Sasa Stojanovic stojsasa@etf.rs E#1: Hello world E#2: Vector addition E#3: Type mixing E#4: Addition of a constant and a vector E#5: Input/output control E#6: Conditional execution E#7: Moving average 1D E#8: Moving average 2D E#9: Array summation E#10: Optimization of E#9 2/x Example No. 1 Write a program that sends the “Hello World!” string to the MAX2 card, for the MAX2 card kernel to return it back to the host. To be learned through this example: ◦ How to make the configuration of the accelerator (MAX2 card) using Java: How to make a simple kernel (ops description) using Java (the only language), How to write the standard manager (config description based on kernel(s)) using Java, ◦ How to test the kernel using a test (code+data) written in Java, ◦ How to compile the Java code for MAX2, ◦ How to write a simple C code that runs on the host and triggers the kernel, How to write the C code that streams data to the kernel, How to write the C code that accepts data from the kernel, ◦ How to simulate and execute an application program in C that runs on the host and periodically calls the accelerator. 3/x Example No. 1 One or more kernel files, to define operations of the application: One (or more) Java file, for simulation of the kernel(s): ◦ <app_name>Kernel[<additional_name>].java ◦ <app_name>SimRunner.java One manager file for transforming the kernel(s) into the configuration of the MAX card (instantiation and connection of kernels): ◦ <app_name>Manager.java Simulator builder: Hardware builder: Application code that uses the MAX card accelerator: Makefile ◦ <app_name>HostSimBuilder.java ◦ <app_name>HWBuilder.java ◦ <app_name>HostCode.c ◦ A script file that defines the compilation related commands 4/x Example No. 1 package ind.z1; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class helloKernel extends Kernel { public helloKernel(KernelParameters parameters) { super(parameters); // Input: HWVar x = io.input("x", hwInt(8)); It is possible to substitute the HWVar result = x; // Output: last three lines with: io.output("z", result, hwInt(8)); io.output("z", result, hwInt(8)); } } 5/x Example No. 1 package ind.z1; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class helloSimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager(“helloSim"); helloKernel k = new helloKernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } } 6/x Example No. 1 package ind.z1; import import import import static config.BoardModel.BOARDMODEL; com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; com.maxeler.maxcompiler.v1.managers.standard.Manager; com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class helloHostSimBuilder { public static void main(String[] args) { Manager m = new Manager(true,”helloHostSim", BOARDMODEL); Kernel k = new helloKernel(m.makeKernelParameters(“helloKernel")); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); } } 7/x Example No. 1 package ind.z1; import import import import static config.BoardModel.BOARDMODEL; com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; com.maxeler.maxcompiler.v1.managers.standard.Manager; com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class helloHWBuilder { public static void main(String[] args) { Manager m = new Manager(“hello", BOARDMODEL); Kernel k = new helloKernel( m.makeKernelParameters() ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); m.build(); } } 8/x Example No. 1 #include <stdio.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; char data_in1[16] = "Hello world!"; char data_out[16]; printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_hello(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); 9/x Example No. 1 printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, 16 * sizeof(char)), max_output("z", data_out, 16 * sizeof(char)), max_runfor(“helloKernel", 16), max_end()); printf("Checking data read from FPGA.\n"); max_close_device(device); max_destroy(maxfile); } return 0; 10/x Example No. 1 # Root of the project directory tree BASEDIR=../../.. # Java package name PACKAGE=ind/z1 # Application name APP=example1 # Names of your maxfiles HWMAXFILE=$(APP).max HOSTSIMMAXFILE=$(APP)HostSim.max # Java application builders HWBUILDER=$(APP)HWBuilder.java HOSTSIMBUILDER=$(APP)HostSimBuilder.java SIMRUNNER=$(APP)SimRunner.java # C host code HOSTCODE=$(APP)HostCode.c # Target board BOARD_MODEL=23312 # Include the master makefile.include nullstring := space := $(nullstring) # comment MAXCOMPILERDIR_QUOTE:=$(subst $(space),\ ,$(MAXCOMPILERDIR)) include $(MAXCOMPILERDIR_QUOTE)/examples/common/Makefile.include 11/x Example No. 1 package config; import com.maxeler.maxcompiler.v1.managers.MAX2BoardModel; public class BoardModel { public static final MAX2BoardModel BOARDMODEL = MAX2BoardModel.MAX2336B; } 12/x Types 13/x Types Floating point numbers - HWFloat: ◦ ◦ ◦ hwFloat(exponent_bits, mantissa_bits); float ~ hwFloat(8,24) double ~ hwFloat(11,53) Fixed point numbers - HWFix: ◦ hwFix(integer_bits, fractional_bits, sign_mode) Integers - HWFix: ◦ hwUint(bits) ~ hwFix(bits, 0, SignMode.UNSIGNED) Boolean – HWFix: ◦ ◦ ◦ hwInt(bits) ~ hwFix(bits, 0, SignMode.TWOSCOMPLEMENT) Unsigned integers - HWFix: ◦ SignMode.UNSIGNED SignMode.TWOSCOMPLEMENT hwBool() ~ hwFix(1, 0, SignMode.UNSIGNED) 1 ~ true 2 ~ false Raw bits – HWRawBits: ◦ hwRawBits(width) 14/x Example No. 2 Write a program that adds two arrays of floating point numbers. Program reads the size of arrays, makes two arrays with an arbitrary content (test inputs), and adds them using a MAX card. 15/x Example No. 2 package ind.z2; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example2Kernel extends Kernel { public example2Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwFloat(8,24)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); } } 16/x Example No. 2 package ind.z2; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example2SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example2Sim"); example2Kernel k = new example2Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2, 3, 4, 5, 6, 7, 8, 9); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 5, 7, 9, 11, 13, 15, 17 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } } 17/x Example No. 2 package ind.z2; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example2HostSimBuilder { public static void main(String[] args) { Manager m = new Manager(true,"example2HostSim", BOARDMODEL); Kernel k = new example2Kernel( m.makeKernelParameters("example2Kernel") ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); } } m.build(); 18/x Example No. 2 package ind.z2; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example2HWBuilder { public static void main(String[] args) { Manager m = new Manager("example2", BOARDMODEL); Kernel k = new example2Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setIO(IOType.ALL_PCIE); } } m.build(); 19/x Example No. 2 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_in2, *data_out; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_in2 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; data_in2[i] = i%3; } printf("Opening and configuring FPGA.\n"); 20/x Example No. 2 maxfile = max_maxfile_init_example2(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, N * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example2Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++) if (data_out[i] != i%10 + i%3){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+i%3), data_out[i]); break; } max_close_device(device); max_destroy(maxfile); return 0; } 21/x Example No. 3 Do the same as in the example no 2, with the following modification: one input array contains floating point numbers, and the other one contains integers. 22/x Example No. 3 Casting here means moving data from one form to another, without changing their essence. Type is: ◦ ◦ ◦ specified for inputs and outputs, propagated from inputs, down the dataflow graph to outputs, used to check that output stream has correct type. If conversion is needed, explicit conversion (cast) is required How to do it? ◦ Additional hardware required (especially for conversion to or from floating point numbers), ◦ use the method cast in class HWVar, introduces additional latency. Cast between a floating point number and an integer number is done by rounding to the nearest integer! 23/x Example No. 3 package ind.z3; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example3Kernel extends Kernel { public example3Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwInt(32)); HWVar result = x + y.cast(hwFloat(8,24)); // Output io.output("z", result, hwFloat(8,24)); } } 24/x Example No. 3 package ind.z3; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example3SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example3Sim"); example3Kernel k = new example3Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2, 3, 4, 5, 6, 7, 8, 9); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 5, 7, 9, 11, 13, 15, 17 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } } 25/x Example No. 3 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out; int *data_in2; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_in2 = malloc(N * sizeof(int)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; data_in2[i] = i%3; } printf("Opening and configuring FPGA.\n"); 26/x Example No. 3 maxfile = max_maxfile_init_example3(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, N * sizeof(int)), max_output("z", data_out, N * sizeof(float)), max_runfor("example3Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ if (data_out[i] != i%10 + i%3){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+i%3), data_out[i]); break; } } max_close_device(device); max_destroy(maxfile); return 0; } 27/x Generating Graph Command: ◦ maxRenderGraphs <build_dir> ◦ <build_dir> - directory where the design is compiled In the virtual machine, directory “Desktop/MaxCompiler-Builds” contains the build directories. Example for application “example2”: ◦ maxRenderGraphs example2HostSim ◦ Renders graphs for the resulting max file 28/x Generating Graph 29/x Generating Graph 30/x Example No. 4 Write a program that adds a constant to an array that contains floating point numbers. Program: ◦ reads the size of the array and the constant that will add to elements of the array, ◦ makes one array in an arbitrary way, and ◦ adds the constant to the array using the MAX card. 31/x Example No. 4 package ind.z4; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example4Kernel extends Kernel { public example4Kernel(KernelParameters parameters) { super(parameters); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.scalarInput("y", hwFloat(8,24)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); } } 32/x Example No. 4 example4SimRunner.java: ◦ Before the kernel run, invoke: setScalarInput(“y”,2); example4HostCode.c: ◦ Read const from standard input, ◦ After the device is opened, but before run, set scalar inputs: max_set_scalar_input_f(device, “example4Kernel.y”, const_add, FPGA_A); max_upload_runtime_params(device, FPGA_A); 33/x Example No. 5 Do the same as in example no 4, with the following modification: use controlled inputs and counters. 34/x Example No. 5 package ind.z5; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example5Kernel extends Kernel { public example5Kernel(KernelParameters parameters) { super(parameters); HWVar ie = control.count.simpleCounter(32); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar y = io.input("y", hwFloat(8,24), ie.eq(0)); HWVar result = x + y; // Output io.output("z", result, hwFloat(8,24)); } } 35/x Example No. 5 package ind.z5; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example5SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example5Sim"); example5Kernel k = new example5Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8); m.setInputData("y", 2); m.setKernelCycles(8); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3, 4, 5, 6, 7, 8, 9, 10 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } } 36/x Example No. 5 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, data_in2[2], *data_out; unsigned long N, i; printf("Enter size of array: "); scanf("%lu%f",&N, data_in2); data_in1 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++) data_in1[i] = i%10; printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example5(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); 37/x Example No. 5 printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_input("y", data_in2, 2 * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example5Kernel", N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ if (data_out[i] != i%10 + data_in2[0]){ printf("Error on element %d. Expected %f, but found %f.", i, (float)(i%10+data_in2[0]), data_out[i]); break; } } max_close_device(device); max_destroy(maxfile); return 0; } 38/x Example No. 6 Translate the following part of code for the Maxeler MAX2 card: for(int i=0; i<N; i++) if(a[i] != b[i]){ c[i] = b[i]-a[i]; d[i] = a[i]*b[i]/c[i]; }else { c[i] = a[i]; d[i] = a[i]+b[i]; } 39/x Example No. 6 package ind.z6; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example6Kernel extends Kernel { public example6Kernel(KernelParameters parameters) { super(parameters); // Input HWVar a = io.input("a", hwFloat(8,24)); HWVar b = io.input("b", hwFloat(8,24)); HWVar c = ~a.eq(b)?b-a:a; HWVar d = ~a.eq(b)?a*b/c:a+b; // Output io.output("c", c, hwFloat(8,24)); io.output("d", d, hwFloat(8,24)); } } 40/x Example No. 6 package ind.z6; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example6SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example6Sim"); example6Kernel k = new example6Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("a", 1, 3); m.setInputData("b", 2, 3); m.setKernelCycles(2); m.runTest(); m.dumpOutput(); double expectedOutputc[] = { 1, 3 }; double expectedOutputd[] = { 2, 6 }; m.checkOutputData("c", expectedOutputc); m.checkOutputData("d", expectedOutputd); m.logMsg("Test passed OK!"); } } 41/x Example No. 7 Write a program that calculates moving average over an array, calculating the average value for each one of the three successive elements of the input array. (a[0]+a[1])/2 , avg[i] = (a[i-1]+a[i]+a[i+1])/3 , (a[n-2]+a[n-3], for i = 0; for 0 < i < n-1; for i = n-1. 42/x Example No. 7 package ind.z7; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example7Kernel extends Kernel { public example7Kernel(KernelParameters parameters) { super(parameters); HWVar N = io.scalarInput("N", hwUInt(64)); HWVar count = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar result = ( (count>0?stream.offset(x,-1):0) + x + (count<N-1?stream.offset(x,1):0) )/ (count>0&count<N-1? constant.var(hwFloat(8,24),3):2); // Output io.output("z", result, hwFloat(8,24)); } } 43/x Example No. 8 Write a program that calculates moving average along a 2D matrix of the size MxN. Transfer the matrix to the MAX2 card through one stream, row by row. 44/x Example No. 8 package ind.z8; import import import import import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.Stream.OffsetExpr; com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; public class example8Kernel extends Kernel { public example8Kernel(KernelParameters parameters) { super(parameters); HWVar M = io.scalarInput("M", hwUInt(32)); OffsetExpr Nof = stream.makeOffsetParam("Nof", 3, 128); HWVar N = io.scalarInput("N", hwUInt(32)); CounterChain cc = control.count.makeCounterChain(); HWVar j = cc.addCounter(M,1); HWVar i = cc.addCounter(N,1); 45/x Example No. 8 // Input HWVar mat = io.input("mat", hwFloat(8,24)); // Extract 8 point window around current point HWVar window[] = new HWVar[9]; int ii = 0; for ( int x=-1; x<=1; x++) for ( int y= -1; y<=1; y++) window[ii++] = (i.cast(hwInt(33))+x>=0 & i.cast(hwInt(33))+x<= N.cast(hwInt(33))-1 & j.cast(hwInt(33))+y >= 0 & j.cast(hwInt(33))+y<=M.cast(hwInt(33))-1)?stream.offset(mat, y*Nof+x):0; // Sum points in window and divide by 9 to average HWVar sum = constant.var(hwFloat(8, 24), 0); for ( HWVar hwVar : window) { sum = sum + hwVar; } HWVar divider = i.eq(0)|i.eq(N-1)|j.eq(0)|j.eq(M-1)?((i.eq(0)|i.eq(N-1))&(j.eq(0)|j.eq(M1))?constant.var(hwFloat(8,24),4):6):9; HWVar result = sum / divider; // Output io.output("z", result, hwFloat(8,24)); } } 46/x Example No. 8 package ind.z8; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example8SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example8Sim"); example8Kernel k = new example8Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("mat", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11,12, 13,14,15,16); m.setScalarInput("M", 4); m.setScalarInput("N", 4); m.setStreamOffsetParam("Nof",4); m.setKernelCycles(16); m.runTest(); m.dumpOutput(); double expectedOutput[] = { 3.5, 4, 5, 5.5, 5.5, 6, 7, 7.5, 9.5, 10, 11, 11.5, 11.5, 12, 13, 13.5 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } } 47/x Example No. 8 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_in2, *data_out; unsigned long M, N, i; printf("Enter size of matrix (MxN, max 1024x1024): "); scanf("%lu%lu",&M,&N); data_in1 = malloc(M*N * sizeof(float)); data_out = malloc(M*N * sizeof(float)); for(i = 0; i < M*N; i++){ data_in1[i] = i%10; } printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example8(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); 48/x Example No. 8 max_set_scalar_input_f(device, "example8Kernel.M", M, FPGA_A); max_set_scalar_input_f(device, "example8Kernel.N", N, FPGA_A); max_set_runtime_param(device, "example8Kernel.Nof", N); max_upload_runtime_params(device, FPGA_A); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("mat", data_in1, M*N * sizeof(float)), max_output("z", data_out, M*N * sizeof(float)), max_runfor("example8Kernel", M*N), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < M*N; i++){ float expected=0, divider = 9; for (int ii = -1; ii<2; ii++) for(int jj = -1; jj<2; jj++) expected += i/N+ii>=0 && i/N+ii<M && i%N+jj>=0 && i%N+jj<N ?data_in1[i+ii*N+jj]:0; if (i/N==0 || i/N==M-1) divider = 6; if (i%N==0 || i%N==N-1) divider = divider == 6? 4:6; expected /= divider; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } } max_close_device(device); max_destroy(maxfile); return 0; } 49/x Example No. 9 Write a program that calculates the sum of n floating point numbers. 50/x Example No. 9 package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input Problem? HWVar x = io.input("x", hwFloat(8,24)); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -1); // Output io.output("z", result, hwFloat(8,24)); } } 51/x Example No. 9 52/x Example No. 9 package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar x = io.input("x", hwFloat(8,24)); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>12?sum:0.0); Solution: New offset = Depth of pipeline loop sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24)); } } 53/x Example No. 9 Still, we need to send 13 times mor data then needed package ind.z9; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example9SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example9Sim"); example9Kernel k = new example9Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 0, 0, 0, 3 , 0, 0, 0, 9 , 0, 0, 0, 0, 2 , 0, 0, 0, 3 , 0, 0, 0, 3 , 0, 0, 0, 0, 3); m.setKernelCycles(27); m.runTest(); 12 unnecessarily data 12 unnecessarily data m.dumpOutput(); double expectedOutput[] = { 1, 3, 6 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } } 54/x Example No. 9 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * 13 * sizeof(float)); data_out = malloc(N * 13 * sizeof(float)); for(i = 0; i < N; i++) for( int j=0; j<13; j++) data_in1[13*i+j] = i%10; printf("Opening and configuring FPGA.\n"); 55/x Example No. 9 maxfile = max_maxfile_init_example9(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * 13 * sizeof(float)), max_output("z", data_out, N * 13* sizeof(float)), max_runfor("example9Kernel", N * 13), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ expected += !(i%13) ? i%10 : 0; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } } max_close_device(device); max_destroy(maxfile); return 0; } 56/x Example No. 9 package ind.z9; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; public class example9Kernel extends Kernel { public example9Kernel(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); CounterChain cc = control.count.makeCounterChain(); HWVar cnt = cc.addCounter(1000000,1); HWVar depth = cc.addCounter(13,1); // Input HWVar x = io.input("x", hwFloat(8,24), depth.eq(0) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), depth.eq(0)); } } 57/x Example No. 9 package ind.z9; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example9SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example9Sim"); example9Kernel k = new example9Kernel( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2 , 3); m.setKernelCycles(27); We still need at least 27 cycles. m.runTest(); m.dumpOutput(); double expectedOutput[] = { 1, 3, 6 }; m.checkOutputData("z", expectedOutput); m.logMsg("Test passed OK!"); } } 58/x Example No. 9 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array: "); scanf("%lu",&N); data_in1 = malloc(N * sizeof(float)); data_out = malloc(N * sizeof(float)); for(i = 0; i < N; i++) data_in1[i] = i%10; printf("Opening and configuring FPGA.\n"); 59/x Example No. 9 maxfile = max_maxfile_init_example9(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_output("z", data_out, N * sizeof(float)), max_runfor("example9Kernel", N * 13 - 12), max_end()); printf("Checking data read from FPGA.\n"); for(i = 0; i < N; i++){ expected += i%10; if (data_out[i] != expected){ printf("Error on element %d. Expected %f, but found %f.", i, expected, data_out[i]); break; } } max_close_device(device); max_destroy(maxfile); return 0; } 60/x Example No. 10 Write an optimized program that calculates the sum of numbers in an input array First, calculate several parallel/partial sums; then, add them at the end 61/x Example No. 10 package ind.z10; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; public class example10Kernel1 extends Kernel { public example10Kernel1(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); HWVar cnt = control.count.simpleCounter(64); // Input HWVar N = io.scalarInput("N", hwUInt(64)); HWVar x = io.input("x", hwFloat(8,24) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), cnt > N-14); } } 62/x Example No. 10 package ind.z10; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.kernelcompiler.KernelParameters; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWVar; import com.maxeler.maxcompiler.v1.kernelcompiler.types.base.HWType; import com.maxeler.maxcompiler.v1.kernelcompiler.stdlib.core.CounterChain; public class example10Kernel2 extends Kernel { public example10Kernel2(KernelParameters parameters) { super(parameters); final HWType scalarType = hwFloat(8,24); CounterChain cc = control.count.makeCounterChain(); HWVar cnt = cc.addCounter(14,1); HWVar depth = cc.addCounter(13,1); // Input HWVar x = io.input("x", hwFloat(8,24), depth.eq(0) ); HWVar sum = scalarType.newInstance(this); HWVar result = x + (cnt>0?sum:0.0); sum <== stream.offset(result, -13); // Output io.output("z", result, hwFloat(8,24), cnt.eq(12)); } } 63/x Example No. 10 package ind.z10; import com.maxeler.maxcompiler.v1.managers.standard.SimulationManager; public class example10SimRunner { public static void main(String[] args) { SimulationManager m = new SimulationManager("example10Sim"); example10Kernel1 k = new example10Kernel1( m.makeKernelParameters() ); m.setKernel(k); m.setInputData("x", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26); m.setKernelCycles(26); m.runTest(); m.dumpOutput(); double exOutput[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39 }; m.checkOutputData("z", exOutput); m.logMsg("Test passed OK!"); } } 64/x Example No. 10 package ind.z10; import com.maxeler.maxcompiler.v1.managers.custom.blocks.KernelBlock; import com.maxeler.maxcompiler.v1.managers.custom.CustomManager; import com.maxeler.maxcompiler.v1.managers.MAXBoardModel; class example10Manager extends CustomManager { public example10Manager(boolean is_simulation, String name, MAXBoardModel board_model ){ super(is_simulation, board_model, name); KernelBlock kb1 = addKernel(new example10Kernel1(makeKernelParameters("example10Kernel1"))); KernelBlock kb2 = addKernel(new example10Kernel2(makeKernelParameters("example10Kernel2"))); } } kb1.getInput("x") <== addStreamFromHost("x"); kb2.getInput("x") <== kb1.getOutput("z"); addStreamToHost("z") <== kb2.getOutput("z"); 65/x Example No. 10 package ind.z10; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.managers.BuildConfig; import com.maxeler.maxcompiler.v1.managers.BuildConfig.Level; public class example10HostSimBuilder { public static void main(String[] args) { example10Manager m = new example10Manager(true,"example10HostSim", BOARDMODEL); m.setBuildConfig(new BuildConfig(Level.FULL_BUILD)); } } m.build(); 66/x Example No. 10 package ind.z10; import static config.BoardModel.BOARDMODEL; import com.maxeler.maxcompiler.v1.kernelcompiler.Kernel; import com.maxeler.maxcompiler.v1.managers.standard.Manager; import com.maxeler.maxcompiler.v1.managers.standard.Manager.IOType; public class example10HWBuilder { public static void main(String[] args) { example10Manager m = new example10Manager(false,"example10HostSim", BOARDMODEL); m.setBuildConfig(new BuildConfig(Level.FULL_BUILD)); } } m.build(); 67/x Example No. 10 #include <stdio.h> #include <stdlib.h> #include <MaxCompilerRT.h> int main(int argc, char* argv[]) { char *device_name = (argc==2 ? argv[1] : "/dev/maxeler0"); max_maxfile_t* maxfile; max_device_handle_t* device; float *data_in1, *data_out, expected = 0; unsigned long N, i; printf("Enter size of array (it will be truncated to the firs lower number dividable with 13): "); scanf("%lu",&N); N /= 13; N *= 13; data_in1 = malloc(N * sizeof(float)); data_out = malloc(1 * sizeof(float)); for(i = 0; i < N; i++){ data_in1[i] = i%10; expected += data_in1[i]; } 68/x Example No. 10 printf("Opening and configuring FPGA.\n"); maxfile = max_maxfile_init_example10(); device = max_open_device(maxfile, device_name); max_set_terminate_on_error(device); max_set_scalar_input_f(device, "example10Kernel1.N", N, FPGA_A); max_upload_runtime_params(device, FPGA_A); printf("Streaming data to/from FPGA...\n"); max_run(device, max_input("x", data_in1, N * sizeof(float)), max_output("z", data_out, 2 * sizeof(float)), max_runfor("example10Kernel1", N), max_runfor("example10Kernel2", 13*12+2), max_end()); printf("Checking data read from FPGA.\n"); printf("Expected: %f, returned: %f\n", expected, *data_out); max_close_device(device); max_destroy(maxfile); return 0; } 69/x