diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/CMakeLists.txt new file mode 100755 index 0000000000..1aa7896147 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/CMakeLists.txt @@ -0,0 +1,20 @@ +if(UNIX) + # Direct CMake to use dpcpp rather than the default C++ compiler/linker + set(CMAKE_CXX_COMPILER dpcpp) +else() # Windows + # Force CMake to use dpcpp rather than the default C++ compiler/linker + # (needed on Windows only) + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (dpcpp IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.4) + +project(ACFixed CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/License.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/License.txt new file mode 100755 index 0000000000..7c8b8a36c6 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/License.txt @@ -0,0 +1,23 @@ +Copyright Intel Corporation + +SPDX-License-Identifier: MIT +https://opensource.org/licenses/MIT + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/README.md b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/README.md new file mode 100755 index 0000000000..355d49e1e7 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/README.md @@ -0,0 +1,260 @@ +# Using the Algorithmic C Fixed Point Data-type 'ac_fixed' + +This FPGA tutorial demonstrates how to use the Algorithmic C (AC) Data-type `ac_fixed` and some best practices. + +***Documentation***: The [DPC++ FPGA Code Samples Guide](https://software.intel.com/content/www/us/en/develop/articles/explore-dpcpp-through-intel-fpga-code-samples.html) helps you to navigate the samples and build your knowledge of DPC++ for FPGA.
+The [oneAPI DPC++ FPGA Optimization Guide](https://software.intel.com/content/www/us/en/develop/documentation/oneapi-fpga-optimization-guide) is the reference manual for targeting FPGAs through DPC++.
+The [oneAPI Programming Guide](https://software.intel.com/en-us/oneapi-programming-guide) is a general resource for target-independent DPC++ programming. + +| Optimized for | Description +--- |--- +| OS | Linux* Ubuntu* 18.04/20.04, RHEL*/CentOS* 8, SUSE* 15; Windows* 10 +| Hardware | Intel® Programmable Acceleration Card (PAC) with Intel Arria® 10 GX FPGA
Intel® FPGA Programmable Acceleration Card (PAC) D5005 (with Intel Stratix® 10 SX)
Intel® FPGA 3rd party / custom platforms with oneAPI support
*__Note__: Intel® FPGA PAC hardware is only compatible with Ubuntu 18.04* +| Software | Intel® oneAPI DPC++ Compiler
Intel® FPGA Add-On for oneAPI Base Toolkit +| What you will learn | How different methods of `ac_fixed` number construction affect hardware resource utilization
Recommended method for constructing `ac_fixed` numbers in your kernel
Accessing and using the `ac_fixed` math library functions
Trading off accuracy of results for reduced resource usage on the FPGA +| Time to complete | 30 minutes + + + +## Purpose + +This FPGA tutorial shows how to use the `ac_fixed` type with some simple examples. + +This data-type can be used in place of native floating point types to generate area efficient and optimized designs for the FPGA. For example, operations which do not utilize all of the bits the native types are good candidates for replacement with `ac_fixed` type. + +This tutorials shows the recommended method for constructing an `ac_fixed` number, some examples of using the fixed point math library functions and how they can be used to reduce the area of the hardware generated by the compiler by trading-off accuracy of the mathematical operations. + +### Simple Code Example + +An `ac_fixed` number can be defined as follows: +```cpp +ac_fixed a; +``` +Here W specifies the width and S specifies the sign of the number. One of the W bits is used to store the sign information. The second parameter I is an integer that specifies the location of the fixed point relative to the most significant bit. + +The type also provides two more optional parameters for controlling the overflow and rounding modes. For more details on the type, rounding and overflow modes and the range of values supported with different width parameterization please refer to the file `ac_data_types_ref.pdf`. + +To use this type in your code, you must include the following header: + +```cpp +#include +``` + +Additionally, you must use the flag `-qactypes` in order to ensure that the headers are correctly included. + +### Recommended method for constructing ac_fixed numbers + +The compiler uses significant FPGA resources to convert double precision (and single precision) floating-point values to `ac_fixed` values. The kernel `ConstructFromFloat` constructs an `ac_fixed` object from an accessor to a native `float` type. + +In contrast, the kernel `ConstructFromACFixed` constructs an `ac_fixed` object from an accessor to another `ac_fixed` object. This consumes far less area than the previous kernel. See the section on examining the reports below to understand where to look for this difference within the optimization reports. + +### Using the ac_fixed math functions + +To use this type in your code, you must include the following header: + +```cpp +#include +``` + +The flag `-qactypes` will ensure that the compiler includes the header and links against the necessary libraries for emulation of the math library functions. + +This tutorial design contains two kernels `CalculateWithFloat` and `CalculateWithACFixed`. Both calculate the simple expression: +```cpp + square_root ( sine(x) * sine(x) + cosine(x) * cosine(x) ) +``` +for some input `x`. + +The kernel `CalculateWithFloat` uses floating point values and the standard math library while `CalculateWithACFixed` uses `ac_fixed` values and the `ac_fixed` math library. The `ac_fixed` inputs are instantiated with the following parameters: + +```cpp + W = 10, I = 3, S = true +``` + +Clearly, the `ac_fixed` numbers are smaller in size than floating point numbers. This results in reduction of the FPGA resources utilized by the functions at the expense of accuracy. To see the trade-offs between accuracy compare the numeric results of the operations. The area utilization differences will be discussed in the section on `Examining the reports`. + +When you use the `ac_fixed` library, keep the following points in mind: + +1. Input Bit Width and Input Value Range Limits + + The fixed-point math functions have bit width and input value range requirements. All bit width and input value range requirements are documented at the top of the ac_fixed_math.hpp file. For example, the `sin_fixed` and `cos_fixed` functions require the integer part's bit width to be 3, and the input value range to be within [-pi, pi]. + +2. Return Types + + For fixed-point functions, each function has a default return type. Assigning the result to a non-default return type triggers a type conversion and can cause an increase in logic use or a loss of accuracy in your results. All return types are documented at the top of the ac_fixed_math.hpp file. For example, for `sin_fixed` and `cos_fixed`, the input type is `ac_fixed`, and the output type is `ac_fixed`. + +3. Accuracy + - Floating point vs Fixed point + + The host program (`main()` function) for this tutorial gives you an estimate of the difference between the correct result and the result provided by the math library functions. The floating point version (which has a greater bit width in this case) generates a more accurate result. + + - Emulation vs Simulation for fixed point math operations + + Due to the differences in the internal math implementations, the results from `ac_fixed` math functions in simulation and emulation might not always be bit-accurate. In this example you can observe the difference between emulation and simulation. + +## Key Concepts + * Constructing an `ac_fixed` from a `float` or `double` value will be much more area intensive than constructing one from another `ac_fixed`. + * The `ac_fixed` math library provides a set of functions for various math operations. + * The functions can be used to trade off accuracy of results for reduced resource usage on the FPGA. + * When using these functions, one must be mindful of the widths of the input and return types and follow the parameterization laid out in the header file for optimal results. + +## License + +Code samples are licensed under the MIT license. See +[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. + + +## Building the `ac_fixed` Tutorial + +### Include Files + +The included header `dpc_common.hpp` is located at `%ONEAPI_ROOT%\dev-utilities\latest\include` on your development system. + +### Running Samples in DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the type of compute node and whether to run in batch or interactive mode. Compiles to FPGA are only supported on fpga_compile nodes. Executing programs on FPGA hardware is only supported on fpga_runtime nodes of the appropriate type, such as fpga_runtime:arria10 or fpga_runtime:stratix10. Neither compiling nor executing programs on FPGA hardware are supported on the login nodes. For more information, see the Intel® oneAPI Base Toolkit Get Started Guide ([https://devcloud.intel.com/oneapi/documentation/base-toolkit/](https://devcloud.intel.com/oneapi/documentation/base-toolkit/)). + +When compiling for FPGA hardware, it is recommended to increase the job timeout to 12h. + +### On a Linux* System + +1. Install the design in `build` directory from the design directory by running `cmake`: + + ```bash + mkdir build + cd build + ``` + + If you are compiling for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + + ```bash + cmake .. + ``` + + Alternatively, to compile for the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX), run `cmake` using the command: + + ```bash + cmake .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + You can also compile for a custom FPGA platform. Ensure that the board support package is installed on your system. Then run `cmake` using the command: + ```bash + cmake .. -DFPGA_BOARD=: + ``` + +2. Compile the design using the generated `Makefile`. The following four build targets are provided that match the recommended development flow: + + * Compile and run for emulation (fast compile time, targets emulates an FPGA device) using: + + ```bash + make fpga_emu + ``` + + * Generate HTML optimization reports using: + + ```bash + make report + ``` + + * Compile and run on FPGA hardware (longer compile time, targets an FPGA device) using: + + ```bash + make fpga + ``` + +3. (Optional) As the above hardware compile may take several hours to complete, FPGA precompiled binaries (compatible with Linux* Ubuntu* 18.04) can be downloaded here. + +### On a Windows* System + +1. Generate the `Makefile` by running `cmake`. + ``` + mkdir build + cd build + ``` + To compile for the Intel® PAC with Intel Arria® 10 GX FPGA, run `cmake` using the command: + ``` + cmake -G "NMake Makefiles" .. + ``` + Alternatively, to compile for the Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX), run `cmake` using the command: + + ``` + cmake -G "NMake Makefiles" .. -DFPGA_BOARD=intel_s10sx_pac:pac_s10 + ``` + You can also compile for a custom FPGA platform. Ensure that the board support package is installed on your system. Then run `cmake` using the command: + ``` + cmake -G "NMake Makefiles" .. -DFPGA_BOARD=: + ``` + +2. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + * Compile for emulation (fast compile time, targets emulated FPGA device): + ``` + nmake fpga_emu + ``` + * Generate the optimization report: + ``` + nmake report + ``` + * Compile for FPGA hardware (longer compile time, targets FPGA device): + ``` + nmake fpga + ``` + +*Note:* The Intel® PAC with Intel Arria® 10 GX FPGA and Intel® FPGA PAC D5005 (with Intel Stratix® 10 SX) do not support Windows*. Compiling to FPGA hardware on Windows* requires a third-party or custom Board Support Package (BSP) with Windows* support. + +### In Third-Party Integrated Development Environments (IDEs) + +You can compile and run this tutorial in the Eclipse* IDE (in Linux*) and the Visual Studio* IDE (in Windows*). +For instructions, refer to the following link: [Intel® oneAPI DPC++ FPGA Workflows on Third-Party IDEs](https://software.intel.com/en-us/articles/intel-oneapi-dpcpp-fpga-workflow-on-ide) + +## Examining the Reports + +Locate the pair of `report.html` files in either: + +* **Report-only compile**: `ac_fixed_report.prj` +* **FPGA hardware compile**: `ac_fixed.prj` + +Scroll down on the Summary page of the report and expand the section titled `Compile Estimated Kernel Resource Utilization Summary`. Observe how the kernel `ConstructFromACFixed` consumes lesser resources than the kernel named `ConstructFromFloat`. Similarly, observe how the kernel named `CaclulateWithACFixed` consumes lesser FPGA resources than `CalculateWithFloat`. + +## Running the Sample + +1. Run the sample on the FPGA emulator (the kernel executes on the CPU): + + ```bash + ./ac_fixed.fpga_emu # Linux + ac_fixed.fpga_emu.exe # Windows + ``` + +2. Run the sample on the FPGA device + + ```bash + ./ac_fixed.fpga # Linux + ``` + +### Example of Output + +```txt +Constructed from float: 3.6416015625 +Constructed from ac_fixed: 3.6416015625 + +MAX DIFF for ac_fixed<10, 3, true>: 0.0078125 +MAX DIFF for float: 9.53674e-07 + +result(fixed point): 1 +result(float): 1 + +result(fixed point): 0.992188 +result(float): 1 + +result(fixed point): 1 +result(float): 1 + +result(fixed point): 1 +result(float): 1 + +result(fixed point): 0.992188 +result(float): 1 + +PASSED +``` + +### Discussion of Results + +You will be able to obtain a smaller hardware footprint for your kernel by ensuring that the `ac_fixed` numbers are constructed from `float` or `double` numbers outside the kernel. Additionally, you can trade-off mathematical operation accuracy for a more resource efficient design by using the `ac_fixed` math library functions. \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_datatypes_ref.pdf b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_datatypes_ref.pdf new file mode 100644 index 0000000000..898ec876d3 Binary files /dev/null and b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_datatypes_ref.pdf differ diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_fixed.sln b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_fixed.sln new file mode 100755 index 0000000000..d88c5baaa2 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_fixed.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.705 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ac_fixed", "ac_fixed.vcxproj", "{73FCAD5C-4C93-4786-B662-A7273C515E22}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {73FCAD5C-4C93-4786-B662-A7273C515E22}.Debug|x64.ActiveCfg = Debug|x64 + {73FCAD5C-4C93-4786-B662-A7273C515E22}.Debug|x64.Build.0 = Debug|x64 + {73FCAD5C-4C93-4786-B662-A7273C515E22}.Release|x64.ActiveCfg = Release|x64 + {73FCAD5C-4C93-4786-B662-A7273C515E22}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {DE911CD1-4F98-4391-BD43-B02212357F5E} + EndGlobalSection +EndGlobal diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_fixed.vcxproj b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_fixed.vcxproj new file mode 100755 index 0000000000..18105790a7 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/ac_fixed.vcxproj @@ -0,0 +1,164 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + 15.0 + {73fcad5c-4c93-4786-b662-a7273c515e22} + Win32Proj + ac_fixed + $(WindowsSDKVersion.Replace("\","")) + + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + Application + true + Intel(R) oneAPI DPC++ Compiler + Unicode + + + Application + false + Intel(R) oneAPI DPC++ Compiler + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + true + + + false + + + false + + + + Use + Level3 + Disabled + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + /Qactypes + + + + + Use + Level3 + Disabled + true + true + pch.h + true + -DFPGA_EMULATOR /Qactypes %(AdditionalOptions) + $(IntDir)ac_fixed.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + /Qactypes + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + /Qactypes + + + + + Use + Level3 + MaxSpeed + true + true + true + true + pch.h + true + -DFPGA_EMULATOR /Qactypes %(AdditionalOptions) + $(IntDir)ac_fixed.obj + $(ONEAPI_ROOT)dev-utilities\latest\include + + + Console + true + true + true + /Qactypes + + + + + + diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/sample.json b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/sample.json new file mode 100755 index 0000000000..6e90216930 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/sample.json @@ -0,0 +1,55 @@ +{ + "guid": "FB4643A1-64B1-4B09-9B98-4E23BA6D0925", + "name": "AC Fixed", + "categories": ["Toolkit/oneAPI Direct Programming/DPC++ FPGA/Tutorials/Features"], + "description": "An Intel® FPGA tutorial demonstrating how to use the Algorithmic C Fixed Point Data Type (AC Fixed) ", + "toolchain": ["dpcpp"], + "os": ["linux", "windows"], + "targetDevice": ["FPGA"], + "builder": ["ide", "cmake"], + "languages": [{"cpp":{}}], + "ciTests": { + "linux": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make fpga_emu", + "./ac_fixed.fpga_emu" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make report" + ] + } + ], + "windows": [ + { + "id": "fpga_emu", + "steps": [ + "mkdir build", + "cd build", + "cmake -G \"NMake Makefiles\" ..", + "nmake fpga_emu", + "ac_fixed.fpga_emu.exe" + ] + }, + { + "id": "report", + "steps": [ + "mkdir build", + "cd build", + "cmake -G \"NMake Makefiles\" ..", + "nmake report" + ] + } + ] + } +} diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/src/CMakeLists.txt b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/src/CMakeLists.txt new file mode 100755 index 0000000000..3c971d0d63 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/src/CMakeLists.txt @@ -0,0 +1,81 @@ +# To see a Makefile equivalent of this build system: +# https://github.com/oneapi-src/oneAPI-samples/blob/master/DirectProgramming/DPC++/ProjectTemplates/makefile-fpga + +set(SOURCE_FILE ac_fixed.cpp) +set(TARGET_NAME ac_fixed) +set(EMULATOR_TARGET ${TARGET_NAME}.fpga_emu) +set(FPGA_TARGET ${TARGET_NAME}.fpga) + +# FPGA board selection +if(NOT DEFINED FPGA_BOARD) + set(FPGA_BOARD "intel_a10gx_pac:pac_a10") + message(STATUS "FPGA_BOARD was not specified.\ + \nConfiguring the design to run on the default FPGA board ${FPGA_BOARD} (Intel(R) PAC with Intel Arria(R) 10 GX FPGA). \ + \nPlease refer to the README for information on board selection.") +else() + message(STATUS "Configuring the design to run on FPGA board ${FPGA_BOARD}") +endif() + +# These are Windows-specific flags: +# 1. /EHsc This is a Windows-specific flag that enables exception handling in host code +# 2. /Qactypes Include ac_types headers and link against ac_types emulation libraries +if(WIN32) + set(WIN_FLAG "/EHsc") + set(AC_TYPES_FLAG "/Qactypes") +else() + set(AC_TYPES_FLAG "-qactypes") +endif() + +# A DPC++ ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. +# For this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS "${WIN_FLAG} -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -Wall") +set(EMULATOR_LINK_FLAGS "-fintelfpga ${AC_TYPES_FLAG}") +set(HARDWARE_COMPILE_FLAGS "${WIN_FLAG} -fintelfpga ${AC_TYPES_FLAG} -Wall") +set(HARDWARE_LINK_FLAGS "-fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") +# We do not need to supply the AC_TYPES_FLAG for the 'report' target's linking stage. +set(REPORT_LINK_FLAGS "-fintelfpga -Xshardware -Xsboard=${FPGA_BOARD} ${USER_HARDWARE_FLAGS}") +# use cmake -D USER_HARDWARE_FLAGS= to set extra flags for FPGA backend compilation + +############################################################################### +### FPGA Emulator +############################################################################### +# To compile in a single command: +# dpcpp -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR fpga_compile.cpp -o fpga_compile.fpga_emu +# CMake executes: +# [compile] dpcpp -fintelfpga ${AC_TYPES_FLAG} -DFPGA_EMULATOR -o fpga_compile.cpp.o -c fpga_compile.cpp +# [link] dpcpp -fintelfpga ${AC_TYPES_FLAG} fpga_compile.cpp.o -o fpga_compile.fpga_emu +add_executable(${EMULATOR_TARGET} ${SOURCE_FILE}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES COMPILE_FLAGS "${EMULATOR_COMPILE_FLAGS}") +set_target_properties(${EMULATOR_TARGET} PROPERTIES LINK_FLAGS "${EMULATOR_LINK_FLAGS}") +add_custom_target(fpga_emu DEPENDS ${EMULATOR_TARGET}) + +############################################################################### +### Generate Report +############################################################################### +# To compile manually: +# dpcpp -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xsboard= -fsycl-link=early ac_fixed.cpp -o ac_fixed_report.a +set(FPGA_EARLY_IMAGE ${TARGET_NAME}_report.a) +# The compile output is not an executable, but an intermediate compilation result unique to DPC++. +add_executable(${FPGA_EARLY_IMAGE} ${SOURCE_FILE}) +add_custom_target(report DEPENDS ${FPGA_EARLY_IMAGE}) +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_EARLY_IMAGE} PROPERTIES LINK_FLAGS "${REPORT_LINK_FLAGS} -fsycl-link=early") + +# fsycl-link=early stops the compiler after RTL generation, before invoking Quartus® + +############################################################################### +### FPGA Hardware +############################################################################### +# To compile in a single command: +# dpcpp -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xsboard= ac_fixed.cpp -o ac_fixed.fpga +# CMake executes: +# [compile] dpcpp -fintelfpga ${AC_TYPES_FLAG} -o ac_fixed.cpp.o -c ac_fixed.cpp +# [link] dpcpp -fintelfpga ${AC_TYPES_FLAG} -Xshardware -Xsboard= ac_fixed.cpp.o -o ac_fixed.fpga +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILE}) +add_custom_target(fpga DEPENDS ${FPGA_TARGET}) +set_target_properties(${FPGA_TARGET} PROPERTIES COMPILE_FLAGS "${HARDWARE_COMPILE_FLAGS}") +set_target_properties(${FPGA_TARGET} PROPERTIES LINK_FLAGS "${HARDWARE_LINK_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_TARGET}") +# The -reuse-exe flag enables rapid recompilation of host-only code changes. +# See DPC++FPGA/GettingStarted/fast_recompile for details. \ No newline at end of file diff --git a/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/src/ac_fixed.cpp b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/src/ac_fixed.cpp new file mode 100644 index 0000000000..d4a0584a25 --- /dev/null +++ b/DirectProgramming/DPC++FPGA/Tutorials/Features/ac_types/ac_fixed/src/ac_fixed.cpp @@ -0,0 +1,193 @@ +// clang-format off +#include +#include +#include +#include +// clang-format on + +// dpc_common.hpp can be found in the dev-utilities include folder. +// e.g., $ONEAPI_ROOT/dev-utilities//include/dpc_common.hpp +#include "dpc_common.hpp" + +using namespace sycl; + +using fixed_10_3_t = ac_fixed<10, 3, true>; +using fixed_9_2_t = ac_fixed<9, 2, true>; + +// Forward declare the kernel name in the global scope. +// This is a FPGA best practice that reduces name mangling in the optimization +// reports. +class ConstructFromFloat; +class ConstructFromACFixed; +class CalculateWithFloat; +class CalculateWithACFixed; + +// Not recommended Usage example: +// Convert dynamic float value inside the kernel +void TestConstructFromFloat(queue &q, float a, + ac_fixed<20, 10, true, AC_RND, AC_SAT> &b) { + buffer inp(&a, 1); + buffer, 1> ret_val(&b, 1); + + q.submit([&](handler &h) { + accessor in_acc{inp, h, read_only}; + accessor out_acc{ret_val, h, write_only, no_init}; + + h.single_task([=] { + ac_fixed<20, 10, true, AC_RND, AC_SAT> t(in_acc[0]); + ac_fixed<20, 10, true, AC_RND, AC_SAT> some_offset(0.5f); + out_acc[0] = t + some_offset; + }); + }); +} + +// Recommended Usage example: +// Convert dynamic float value outside the kernel +void TestConstructFromACFixed(queue &q, + ac_fixed<20, 10, true, AC_RND, AC_SAT> &a) { + buffer, 1> buff_a(&a, 1); + + q.submit([&](handler &h) { + accessor out_acc{buff_a, h, read_write}; + + h.single_task([=] { + ac_fixed<20, 10, true, AC_RND, AC_SAT> t(out_acc[0]); + ac_fixed<20, 10, true, AC_RND, AC_SAT> some_offset(0.5f); + out_acc[0] = t + some_offset; + }); + }); +} + +void TestCalculateWithFloat(queue &q, const float x, float &ret) { + buffer inp_buffer(&x, 1); + buffer ret_buffer(&ret, 1); + + q.submit([&](handler &h) { + accessor x{inp_buffer, h, read_only}; + accessor res{ret_buffer, h, write_only, no_init}; + + h.single_task([=] { + float sin_ret = sinf(x[0]); + float cos_ret = cosf(x[0]); + res[0] = sqrtf(sin_ret * sin_ret + cos_ret * cos_ret); + }); + }); +} + +// clang-format off +// Please refer to ac_fixed_math.hpp header file for fixed point math +// functions' type deduction rule. In this case, following those rules: +// I, W, S are input type template parameter (ac_fixed) +// rI, rW, rS are output type template parameter (ac_fixed) +//* Function Name Type Propagation Rule +//* sqrt_fixed rI = I, rW = W, rS = S +//* sin_fixed For signed (S == true), rI == 2, rW = W - I + 2; +//* For unsigned (S == false), I == 1, rW = W - I + 1 +//* cos_fixed For signed (S == true), rI == 2, rW = W - I + 2; +//* For unsigned (S == false), I == 1, rW = W - I + 1 +// clang-format on +void TestCalculateWithACFixed(queue &q, const fixed_10_3_t &x, + fixed_9_2_t &ret) { + buffer inp_buffer(&x, 1); + buffer ret_buffer(&ret, 1); + + q.submit([&](handler &h) { + accessor x{inp_buffer, h, read_only}; + accessor res{ret_buffer, h, write_only, no_init}; + + h.single_task([=] { + fixed_9_2_t sin_ret = sin_fixed(x[0]); + fixed_9_2_t cos_ret = cos_fixed(x[0]); + res[0] = sqrt_fixed(sin_ret * sin_ret + cos_ret * cos_ret); + }); + }); +} + +constexpr int SIZE = 5; + +int main() { +#if defined(FPGA_EMULATOR) + ext::intel::fpga_emulator_selector selector; +#else + ext::intel::fpga_selector selector; +#endif + + try { + // Create the SYCL device queue + queue q(selector, dpc_common::exception_handler); + + ac_fixed<20, 10, true, AC_RND, AC_SAT> a; + ac_fixed<20, 10, true, AC_RND, AC_SAT> b = 3.1415f; + + TestConstructFromFloat(q, 3.1415f, a); + std::cout << "Constructed from float:\t\t" << a << "\n"; + + TestConstructFromACFixed(q, b); + std::cout << "Constructed from ac_fixed:\t" << b << "\n\n"; + + constexpr float inputs[SIZE] = {-0.807991899423f, -2.09982907558f, + -0.742066235466f, -2.33217071676f, + 1.14324158042f}; + + // quantum: the minimum positive value this type can represent + // Quantum is 1 / 2 ^ (W - I), where W and I are the total width and the + // integer width of the ac_fixed number + constexpr fixed_9_2_t quantum = 0.0078125f; + + // for fixed point, the error should be less than 1 quantum of data type + // (1 / 2^(W - I)) + constexpr fixed_9_2_t epsilon_fixed_9_2 = quantum; + constexpr float epsilon_float = 1.0f / (1.0f * float(1 << 20)); + + std::cout << "MAX DIFF for ac_fixed<10, 3, true>: " + << epsilon_fixed_9_2.to_double() << "\n"; + std::cout << "MAX DIFF for float: " << epsilon_float + << "\n\n"; + + bool pass = true; + + for (int i = 0; i < SIZE; i++) { + fixed_10_3_t fixed_type_input = inputs[i]; + float float_type_input = inputs[i]; + + // declare output and diff variable + fixed_9_2_t fixed_type_result; + TestCalculateWithACFixed(q, fixed_type_input, fixed_type_result); + float float_type_result; + TestCalculateWithFloat(q, float_type_input, float_type_result); + + std::cout << "result(fixed point): " << fixed_type_result.to_double() + << "\n"; + std::cout << "result(float): " << float_type_result << "\n\n"; + + // expected result is 1.0 = sqrt(sin^2(x) + cos^2(x)) + fixed_9_2_t diff1 = fabs(fixed_type_result.to_double() - 1.0); + float diff2 = fabs(float_type_result - 1.0); + + if (diff1 > epsilon_fixed_9_2 || diff2 > epsilon_float) { + pass = false; + } + } + + if (pass) { + std::cout << "PASSED\n"; + } else { + std::cout << "ERROR\n"; + } + } catch (sycl::exception const &e) { + // Catches exceptions in the host code + std::cerr << "Caught a SYCL host exception:\n" << e.what() << "\n"; + + // Most likely the runtime couldn't find FPGA hardware! + if (e.code().value() == CL_DEVICE_NOT_FOUND) { + std::cerr << "If you are targeting an FPGA, please ensure that your " + "system has a correctly configured FPGA board.\n"; + std::cerr << "Run sys_check in the oneAPI root directory to verify.\n"; + std::cerr << "If you are targeting the FPGA emulator, compile with " + "-DFPGA_EMULATOR.\n"; + } + std::terminate(); + } + + return 0; +}