From 3b5b9a01ae41993b138856eb55b5d08bf35bb639 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Tue, 10 Nov 2020 19:55:05 +0100 Subject: [PATCH 01/77] sixtracklib/common: fixes type inconsistency Due to a search & replace error, 'sixtrack::(anonymous)::st_size_t` is defined as an alias to `::st_size_t` which is not necessarily the same size as `unsigned long long`. This seems to cause the problems described in SixTrack/sixtracklib#133 --- sixtracklib/common/internal/track_job_base.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sixtracklib/common/internal/track_job_base.cpp b/sixtracklib/common/internal/track_job_base.cpp index ed538aab9..4ac6c1f39 100644 --- a/sixtracklib/common/internal/track_job_base.cpp +++ b/sixtracklib/common/internal/track_job_base.cpp @@ -50,7 +50,7 @@ namespace SIXTRL_CXX_NAMESPACE { namespace st = SIXTRL_CXX_NAMESPACE; using tjob_t = st::TrackJobBase; - using st_size_t = st_size_t; + using st_size_t = tjob_t::size_type; using st_status_t = tjob_t::status_t; using st_track_status_t = tjob_t::track_status_t; using paddr_t = tjob_t::particles_addr_t; From deecca1a514da8ac05a03932c8e4cc9a91efc0c8 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 12:23:11 +0100 Subject: [PATCH 02/77] cmake: bumps minimum required cmake version to 3.11 NOTE: cmake 3.11 introduces the FetchContent extension which is much better suited than the currently used ExternalProject facility (or manually downloading stuff via git) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 34550eaa9..8b35e107c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ # granted to it by virtue of its status as an Intergovernmental Organization or # submit itself to any jurisdiction. -cmake_minimum_required( VERSION 3.8 FATAL_ERROR ) +cmake_minimum_required( VERSION 3.11 FATAL_ERROR ) project( sixtracklib LANGUAGES C CXX ) message( STATUS "---- Project sixtracklib" ) From 8438311a07e71bf7077ce66846b4b19d9725497f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 12:28:49 +0100 Subject: [PATCH 03/77] cmake: updates OpenCL setup script - uses FetchContent to download OpenCL C headers, OpenCL C++ headers or the OpenCL icd loader if needed - Adapts to the new OpenCL 3.x header structure --- cmake/SetupOpenCL.cmake | 385 ++++++++++++++++++++++++---------------- 1 file changed, 232 insertions(+), 153 deletions(-) diff --git a/cmake/SetupOpenCL.cmake b/cmake/SetupOpenCL.cmake index 2adc4c762..dddd068f8 100644 --- a/cmake/SetupOpenCL.cmake +++ b/cmake/SetupOpenCL.cmake @@ -5,187 +5,266 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) # -------------------------------------------------------------------------- # Add OPENCL to the list of supported modules and track its state: + set( SIXTRACKLIB_MODULE_VALUE_OPENCL 0 ) - list( APPEND SIXTRACKLIB_SUPPORTED_MODULES "OPENCL" ) + # -------------------------------------------------------------------------- + # Provide include directories and library directories for OpenCL, if enabled if( SIXTRACKL_ENABLE_OPENCL ) - list( APPEND SIXTRACKLIB_SUPPORTED_MODULES_VALUES "1" ) - else() - list( APPEND SIXTRACKLIB_SUPPORTED_MODULES_VALUES "0" ) - endif() + set( SIXTRL_OPENCL_INCLUDE_DIRS ) + set( SIXTRL_OPENCL_LIBRARIES ) - # -------------------------------------------------------------------------- - # Provide include directories and library directories for OpenCL, if enabled + if( NOT OpenCL_FOUND ) + find_package( OpenCL QUIET ) + endif() - if( NOT SIXTRACKL_OPENCL_INCLUDE_DIR ) - set( SIXTRACKL_OPENCL_INCLUDE_DIR ) - endif() + if( OpenCL_FOUND ) + set( SIXTRL_TEMP_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS} ) + set( SIXTRL_OPENCL_LIBRARIES ${OpenCL_LIBRARIES} ) + elseif( SIXTRACKL_REQUIRE_OFFLINE_BUILD ) + set( SIXTRL_TEMP_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/external/CL" ) + else() + message( FATAL_ERROR + "---- Unable to find OpenCL setup, unable to download since offline build required" ) + endif() - if( NOT SIXTRACKL_OPENCL_LIBRARY ) - set( SIXTRACKL_OPENCL_LIBRARY ) - endif() + foreach( dir ${SIXTRL_TEMP_INCLUDE_DIRS} ) + if( NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE ) + if( EXISTS "${dir}/CL/opencl.h" ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/opencl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${dir} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 3 ) + elseif( EXISTS "${dir}/CL/cl.h" ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/cl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${dir} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 1 ) + endif() + endif() - if( NOT SIXTRACKL_OPENCL_VERSION_STR ) - set( SIXTRACKL_OPENCL_VERSION_STR "" ) - endif() + if( NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE ) + if( EXISTS "${dir}/CL/opencl.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/opencl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 3 ) + elseif( EXISTS "${dir}/CL/cl2.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl2.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 2 ) + elseif( EXISTS "${dir}/CL/cl.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 1 ) + endif() + endif() + endforeach() - set( khr_cxx_ocl_UPDATED 0 ) - set( khr_cxx_ocl_SYNC 0 ) - set( khr_cxx_ocl_EXT_DIR "${CMAKE_SOURCE_DIR}/external/CL" ) + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( SIXTRACKL_ENABLE_OPENCL ) - if( NOT OpenCL_FOUND ) - find_package( OpenCL REQUIRED ) + if( NOT OpenCL_FOUND OR + NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION OR + NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) + + set( SIXTRL_OPENCL_EXT_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/CL ) + include( FetchContent ) endif() - if( OpenCL_FOUND ) - set( SIXTRACKL_OPENCL_LIBRARY - ${SIXTRACKL_OPENCL_LIBRARY} ${OpenCL_LIBRARY} ) + set( SIXTRL_OPENCL_USE_DOWNOADED_HEADERS OFF ) - set( SIXTRACKL_OPENCL_VERSION_STR - ${SIXTRACKL_OPENCL_VERSION_STR} ${OpenCL_VERSION_STRING} ) + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - set( SIXTRACKL_OPENCL_INCLUDE_DIR - ${SIXTRACKL_OPENCL_INCLUDE_DIR} ${OpenCL_INCLUDE_DIR} ) + if( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD AND ( + NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE OR + NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) ) - if( NOT SIXTRACKL_USE_LEGACY_CL_HPP ) - set( CXX_OPENCL_HEADER_NAME "cl2.hpp" ) - else() - set( CXX_OPENCL_HEADER_NAME "cl.hpp" ) + if( NOT EXISTS ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + file( MAKE_DIRECTORY ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) endif() - set( CXX_OPENCL_HEADER "${OpenCL_INCLUDE_DIR}/CL/${CXX_OPENCL_HEADER_NAME}" ) + FetchContent_Declare( opencl_c99_headers + GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-Headers.git" + GIT_TAG "v2020.06.16" + GIT_SHALLOW 1 + CONFIGURE_COMMAND ${CMAKE_COMMAND} -E echo "Configure: no operation" + BUILD_COMMAND ${CMAKE_COMMAND} -E echo "Build: no operation" + INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "Install: handled outside of this step" ) + + FetchContent_GetProperties( opencl_c99_headers ) + message( STATUS "------ Using external OpenCL C99 headers" ) + if( NOT opencl_c99_headers_POPULATED ) + message( STATUS "------ Downloading external OpenCL C99 headers ..." ) + FetchContent_Populate( opencl_c99_headers ) + message( STATUS "------ Downloading external OpenCL C99 headers [DONE]" ) + endif() - if( NOT EXISTS ${CXX_OPENCL_HEADER} ) - message( STATUS "------ Unable to find OpenCl 1.x C++ header" ) + if( opencl_c99_headers_POPULATED ) + file( GLOB SIXTRL_OPENCL_C99_IN_FILES + "${opencl_c99_headers_SOURCE_DIR}/CL/*.h" ) + if( SIXTRL_OPENCL_C99_IN_FILES ) + set( SIXTRL_OPENCL_USE_DOWNOADED_HEADERS ON ) + endif() - include( SetupGit ) - include( SetupPython ) + foreach( in_file ${SIXTRL_OPENCL_C99_IN_FILES} ) + get_filename_component( in_file_name ${in_file} NAME ) + get_filename_component( in_dir ${in_file} DIRECTORY ) + file( COPY ${in_file} + DESTINATION ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + + if( NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE ) + if( ${in_file_name} STREQUAL "opencl.h" ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "opencl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 3 ) + elseif( ${in_file_name} STREQUAL "cl.h" ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "cl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 1 ) + endif() + endif() + endforeach() + endif() + endif() - if( Git_FOUND ) - message( STATUS "------ Attempt to download headers ... " ) - set( khr_cxx_ocl_GIT_REPOSITORY https://github.com/KhronosGroup/OpenCL-CLHPP.git ) - set( khr_cxx_ocl_GIT_BRANCH master ) + if( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD AND ( + NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE OR + NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) ) + if( NOT EXISTS ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + file( MAKE_DIRECTORY ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + endif() - Git_sync_with_repo( TARGET khr_cxx_ocl - GIT_REPOSITORY ${khr_cxx_ocl_GIT_REPOSITORY} - GIT_BRANCH ${khr_cxx_ocl_GIT_BRANCH} ) + FetchContent_Declare( opencl_cxx_headers + GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-CLHPP.git" + GIT_TAG "master" + GIT_SHALLOW 1 + CONFIGURE_COMMAND ${CMAKE_COMMAND} -E echo "Configure: no operation" + BUILD_COMMAND ${CMAKE_COMMAND} -E echo "Build: no operation" + INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "Install: handled outside of this step" ) + + FetchContent_GetProperties( opencl_cxx_headers ) + message( STATUS "------ Using external OpenCL C++ headers" ) + if( NOT opencl_cxx_headers_POPULATED ) + message( STATUS "------ Downloading external OpenCL C++ headers ..." ) + FetchContent_Populate( opencl_cxx_headers ) + message( STATUS "------ Downloading external OpenCL C++ headers [DONE]" ) + endif() - if( NOT khr_cxx_ocl_DIR ) - message( FATAL_ERROR "------ Unable to fetch C++ OpenCL headers from git repository" ) - endif() + if( opencl_cxx_headers_POPULATED ) + file( GLOB SIXTRL_OPENCL_CXX_IN_FILES + "${opencl_cxx_headers_SOURCE_DIR}/include/CL/*.hpp" ) + if( SIXTRL_OPENCL_CXX_IN_FILES ) + set( SIXTRL_OPENCL_USE_DOWNOADED_HEADERS ON ) + endif() - if( ${khr_cxx_ocl_SYNC} EQUAL 1 ) - if( ${khr_cxx_ocl_UPDATED} EQUAL 0 ) - message( STATUS "------ C++ OpenCL headers already sync, no update for embedded library" ) - elseif( ${khr_cxx_ocl_UPDATED} EQUAL 1 ) - message( STATUS "------ C++ OpenCL headers successfully cloned/pulled from ${khr_cxx_ocl_GIT_REPOSITORY}/${khr_cxx_ocl_GIT_BRANCH}" ) - message( STATUS "------ Attempting to update the embedded library at ${khr_cxx_ocl_EXT_DIR} ..." ) - - set( khr_cxx_ocl_TRANSFERRED_HEADER_FILES 0 ) - if( EXISTS "${khr_cxx_ocl_DIR}/LICENSE.txt" ) - configure_file( "${khr_cxx_ocl_DIR}/LICENSE.txt" - "${khr_cxx_ocl_EXT_DIR}/LICENSE.txt" COPYONLY ) - endif() - - if( EXISTS "${khr_cxx_ocl_DIR}/README.md" ) - configure_file( "${khr_cxx_ocl_DIR}/README.md" - "${khr_cxx_ocl_EXT_DIR}/OpenCL-CLHPP_README.md" COPYONLY ) - endif() - - if( EXISTS "${khr_cxx_ocl_DIR}/CODE_OF_CONDUCT.md" ) - configure_file( "${khr_cxx_ocl_DIR}/CODE_OF_CONDUCT.md" - "${khr_cxx_ocl_EXT_DIR}/CODE_OF_CONDUCT.md" COPYONLY ) - endif() - - if( EXISTS "${khr_cxx_ocl_DIR}/include/CL/cl2.hpp" ) - configure_file( "${khr_cxx_ocl_DIR}/include/CL/cl2.hpp" - "${khr_cxx_ocl_EXT_DIR}/cl2.hpp" COPYONLY ) - set( khr_cxx_ocl_TRANSFERRED_HEADER_FILES 1 ) - else() - message( WARNING "------ No CL/cl2.hpp header found inside ${khr_cxx_ocl_DIR}/include -> skipping!" ) - endif() - - if( PYTHONINTERP_FOUND - AND EXISTS "${khr_cxx_ocl_DIR}/gen_cl_hpp.py" - AND EXISTS "${khr_cxx_ocl_DIR}/input_cl.hpp" ) - - execute_process( COMMAND ${PYTHON_EXECUTABLE} gen_cl_hpp.py - WORKING_DIRECTORY ${khr_cxx_ocl_DIR} - OUTPUT_VARIABLE EXE_PROCESS_OUTPUT - RESULT_VARIABLE EXE_PROCESS_RESULT - ERROR_VARIABLE EXE_PROCESS_ERROR ) - - if( NOT ( ${EXE_PROCESS_RESULT} EQUAL 0 ) AND EXE_PROCESS_OUTPUT ) - message( STATUS "------ Error python gen_cl_hpp.py: ${EXE_PROCESS_ERROR}" ) - endif() - - if( EXISTS "${khr_cxx_ocl_DIR}/cl.hpp" ) - configure_file( "${khr_cxx_ocl_DIR}/cl.hpp" - "${khr_cxx_ocl_EXT_DIR}/cl.hpp" COPYONLY ) - set( khr_cxx_ocl_TRANSFERRED_HEADER_FILES 1 ) - else() - message( WARNING "------ No cl.hpp file present to add to ${khr_cxx_ocl_EXT_DIR} -> skipping!" ) - endif() - - else() - message( WARNING "------ Unable to run generator script gen_cl_hpp.py to create cl.hpp -> skipping!" ) - endif() - - if( ${khr_cxx_ocl_TRANSFERRED_HEADER_FILES} EQUAL 1 ) - message( STATUS "------ transfered header files to ${khr_cxx_ocl_EXT_DIR}" ) - endif() - else() - message( FATAL_ERROR "------ internal error Git_sync_with_repo" ) + foreach( in_file ${SIXTRL_OPENCL_CXX_IN_FILES} ) + get_filename_component( in_file_name ${in_file} NAME ) + get_filename_component( in_dir ${in_file} DIRECTORY ) + file( COPY ${in_file} + DESTINATION ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + + if( NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE ) + if( ${in_file_name} STREQUAL "opencl.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "opencl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 3 ) + elseif( ${in_file_name} STREQUAL "cl2.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "cl2.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 2 ) + elseif( ${in_file_name} STREQUAL "cl.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "cl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 1 ) endif() - else() - message( WARNING "----- Unable to sync external OpenCL C++ headers -> rely on existing headers instead" ) endif() - endif() + endforeach() + endif() + endif() - if( ${khr_cxx_ocl_SYNC} EQUAL 1 AND - EXISTS "${khr_cxx_ocl_EXT_DIR}/${CXX_OPENCL_HEADER_NAME}" ) - set( CXX_OPENCL_HEADER "${khr_cxx_ocl_EXT_DIR}/${CXX_OPENCL_HEADER_NAME}" ) - set( SIXTRACKL_OPENCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/external" ) - endif() + if( NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE OR + NOT DEFINED SIXTRL_OPENCL_CXX_INCLUDE_DIR ) + message( FATAL_ERROR "---- No C++ OpenCL headers available" ) + endif() + + if( NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE OR + NOT DEFINED SIXTRL_OPENCL_C99_INCLUDE_DIR ) + message( FATAL_ERROR "---- No C OpenCL headers available" ) + endif() + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + if( OpenCL_FOUND ) + set( SIXTRACKLIB_MODULE_VALUE_OPENCL 1 ) + elseif( NOT OpenCL_FOUND AND NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD ) + FetchContent_Declare( opencl_icd_loader + GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-ICD-Loader.git" + GIT_TAG "v2020.06.16" + GIT_SHALLOW 1 ) + + if( NOT opencl_icd_loader_POPULATED ) + message( STATUS "------ Downloading external OpenCL ICD Loader ..." ) + FetchContent_Populate( opencl_icd_loader ) + message( STATUS "------ Downloading external OpenCL ICD Loader [DONE]" ) endif() - if( EXISTS ${CXX_OPENCL_HEADER} ) - set( SIXTRL_OPENCL_ENABLE_EXCEPTION_STR "" ) - set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG 0 ) - - if( SIXTRACKL_USE_LEGACY_CL_HPP ) - set( SIXTRL_OPENCL_CL_HPP "CL/cl.hpp" ) - set( SIXTRL_USES_CL2_HPP 0 ) - set( SIXTRL_USES_CL_HPP 1 ) - - if( SIXTRL_OPENCL_ENABLE_HOST_EXCEPTIONS ) - set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG 1 ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - " #if !defined( __CL_ENABLE_EXCEPTIONS )\r\n" ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - " #define __CL_ENABLE_EXCEPTIONS \r\n" ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - " #endif /* !defined( __CL_ENABLE_EXCEPTIONS ) */ \r\n" ) - endif() - else() - set( SIXTRL_OPENCL_CL_HPP "CL/cl2.hpp" ) - set( SIXTRL_USES_CL2_HPP 1 ) - set( SIXTRL_USES_CL_HPP 0 ) - - if( SIXTRACKL_OPENCL_CXX_ENABLE_EXCEPTIONS ) - set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG 1 ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - " #if !defined( CL_HPP_ENABLE_EXCEPTIONS )\r\n" ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - " #define CL_HPP_ENABLE_EXCEPTIONS \r\n" ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - " #endif /* !defined( CL_HPP_ENABLE_EXCEPTIONS ) */ \r\n" ) - endif() - endif() + get_filename_component( SIXTRL_OPENCL_C99_HEADER_DIR + ${SIXTRL_OPENCL_C99_HEADER_FILE} DIRECTORY ) + + file( COPY ${SIXTRL_OPENCL_C99_HEADER_DIR} DESTINATION + ${opencl_icd_loader_SOURCE_DIR}/inc PATTERN "*.h" ) + + FetchContent_MakeAvailable( opencl_icd_loader ) + set( SIXTRL_OPENCL_LIBRARIES ${SIXTRL_OPENCL_LIBRARIES} OpenCL ) + set( SIXTRACKLIB_MODULE_VALUE_OPENCL 1 ) + + elseif( NOT OpenCL_FOUND ) + message( FATAL_ERROR + "---- Unable to download OpenCL icd loader due to offline build requirement" ) + endif() + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + set( SIXTRL_OPENCL_INCLUDE_DIRS ${SIXTRL_OPENCL_C99_INCLUDE_DIR} ) + + if( NOT ( ${SIXTRL_OPENCL_CXX_INCLUDE_DIR} STREQUAL + ${SIXTRL_OPENCL_C99_INCLUDE_DIR} ) ) + set( SIXTRL_OPENCL_INCLUDE_DIRS ${SIXTRL_OPENCL_INCLUDE_DIRS} + ${SIXTRL_OPENCL_CXX_INCLUDE_DIR} ) + endif() + endif() + + # --------------------------------------------------------------------------- + + set( SIXTRL_OPENCL_ENABLE_EXCEPTION_STR "" ) + set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG 0 ) + + if( ${SIXTRACKLIB_MODULE_VALUE_OPENCL} EQUAL 1 ) + if( SIXTRACKL_OPENCL_DEFAULT_COMPILER_FLAGS ) + set( SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS + ${SIXTRACKL_OPENCL_DEFAULT_COMPILER_FLAGS} ) + endif() + + if( SIXTRACKL_OPENCL_CXX_ENABLE_EXCEPTIONS ) + if( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION EQUAL 2 OR + SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION EQUAL 3 ) + set( SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO + "CL_HPP_ENABLE_EXCEPTIONS" ) + else() + set( SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO + "__CL_ENABLE_EXCEPTIONS" ) endif() + + set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG 1 ) + string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR + "#if !defined( ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} )\r\n" ) + string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR + " #define ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} \r\n" ) + string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR + "#endif /* !defined( ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} ) */\r\n" ) endif() endif() -endif() -#end: cmake/SetupOpenCL.cmake + # --------------------------------------------------------------------------- + + list( APPEND SIXTRACKLIB_SUPPORTED_MODULES "OPENCL" ) + list( APPEND SIXTRACKLIB_SUPPORTED_MODULES_VALUES + "${SIXTRACKLIB_MODULE_VALUE_OPENCL}" ) +endif() From 63fd9c86bbca6e3abd7f26bfa01e9a9d5e9ed301 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 12:34:23 +0100 Subject: [PATCH 04/77] sixtracklib/opencl: updates wrapper headers to manage new OpenCL 3.x standard --- sixtracklib/opencl/.gitignore | 3 +- sixtracklib/opencl/CMakeLists.txt | 43 +++++++++++------- sixtracklib/opencl/cl.h.template | 63 -------------------------- sixtracklib/opencl/opencl.h.template | 27 +++++++++++ sixtracklib/opencl/opencl.hpp.template | 53 ++++++++++++++++++++++ 5 files changed, 109 insertions(+), 80 deletions(-) delete mode 100644 sixtracklib/opencl/cl.h.template create mode 100644 sixtracklib/opencl/opencl.h.template create mode 100644 sixtracklib/opencl/opencl.hpp.template diff --git a/sixtracklib/opencl/.gitignore b/sixtracklib/opencl/.gitignore index db0eeddbd..d80e99182 100644 --- a/sixtracklib/opencl/.gitignore +++ b/sixtracklib/opencl/.gitignore @@ -1 +1,2 @@ -cl.h +opencl.h +opencl.hpp diff --git a/sixtracklib/opencl/CMakeLists.txt b/sixtracklib/opencl/CMakeLists.txt index b6581735a..a9d086f9e 100644 --- a/sixtracklib/opencl/CMakeLists.txt +++ b/sixtracklib/opencl/CMakeLists.txt @@ -3,7 +3,7 @@ message( STATUS "---- processing sixtracklib/opencl/CMakeLists.txt" ) -set( C99_HEADERS_INSTALL_PATH "include/sixtracklib/opencl" ) +set( C99_HEADERS_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}/include/sixtracklib/opencl" ) set( CXX_HEADERS_INSTALL_PATH ${C99_HEADERS_INSTALL_PATH} ) set( SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS "" ) @@ -12,22 +12,29 @@ if( SIXTRACKL_OPENCL_DEFAULT_COMPILER_FLAGS ) "${SIXTRACKL_OPENCL_DEFAULT_COMPILER_FLAGS}" ) endif() -set( CL_H_IN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/cl.h.template" ) -set( CL_H_OUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/cl.h" ) -configure_file( ${CL_H_IN_FILE} ${CL_H_OUT_FILE} @ONLY ) +set( OPENCL_H_IN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/opencl.h.template" ) +set( OPENCL_H_OUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/opencl.h" ) +configure_file( ${OPENCL_H_IN_FILE} ${OPENCL_H_OUT_FILE} @ONLY ) + +set( OPENCL_HPP_IN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/opencl.hpp.template" ) +set( OPENCL_HPP_OUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/opencl.hpp" ) +configure_file( ${OPENCL_HPP_IN_FILE} ${OPENCL_HPP_OUT_FILE} @ONLY ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Handle install of cl.h -set( CL_H_INSTALL_PATH - "${CMAKE_INSTALL_PREFIX}/${CXX_HEADERS_INSTALL_PATH}/${CL_H_OUT_FILE}" ) +set( OPENCL_H_INSTALL_FILE "${CXX_HEADERS_INSTALL_PATH}/${OPENCL_H_OUT_FILE}" ) +set( OPENCL_HPP_INSTALL_FILE "${CXX_HEADERS_INSTALL_PATH}/${OPENCL_HPP_OUT_FILE}" ) set( CL_H_INSTALL_INFO "set( SIXTRACKL_MIN_OPENCL_VERSION ${SIXTRACKL_MIN_OPENCL_VERSION} ) set( SIXTRACKL_TARGET_OPENCL_VERSION ${SIXTRACKL_TARGET_OPENCL_VERSION} ) - set( SIXTRL_OPENCL_CL_HPP ${SIXTRL_OPENCL_CL_HPP} ) - set( SIXTRL_USES_CL2_HPP ${SIXTRL_USES_CL2_HPP} ) - set( SIXTRL_USES_CL_HPP ${SIXTRL_USES_CL_HPP} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE \"${SIXTRL_OPENCL_CXX_HEADER_FILE}\" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR \"${SIXTRL_OPENCL_CXX_INCLUDE_DIR}\" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ${SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE \"${SIXTRL_OPENCL_C99_HEADER_FILE}\" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR \"${SIXTRL_OPENCL_C99_INCLUDE_DIR}\" ) + set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ${SIXTRL_OPENCL_C99_HEADER_FILE_VERSION} ) set( SIXTRL_OPENCL_ENABLE_EXCEPTION_STR ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR} ) set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG @@ -35,13 +42,17 @@ set( CL_H_INSTALL_INFO install( CODE " ${CL_H_INSTALL_INFO} - message( STATUS \"Generating: ${CL_H_INSTALL_PATH}\" ) - configure_file( \"${CL_H_IN_FILE}\" \"${CL_H_INSTALL_PATH}\" @ONLY)" ) + message( STATUS \"Generating: ${OPENCL_H_INSTALL_FILE}\" ) + configure_file( \"${OPENCL_H_IN_FILE}\" + \"${OPENCL_H_INSTALL_FILE}\" @ONLY ) + message( STATUS \"Generating: ${OPENCL_HPP_INSTALL_FILE}\" ) + configure_file( \"${OPENCL_HPP_IN_FILE}\" + \"${OPENCL_HPP_INSTALL_FILE}\" @ONLY )" ) # ------------------------------------------------------------------------------ -set( SIXTRACKLIB_OPENCL_HEADERS - ${CL_H_OUT_FILE} argument.h context.h track_job_cl.h make_track_job.h ) +set( SIXTRACKLIB_OPENCL_HEADERS ${OPENCL_H_OUT_FILE} ${OPENCL_HPP_OUT_FILE} + argument.h context.h track_job_cl.h make_track_job.h ) set( SIXTRACKLIB_OPENCL_INTERNAL_HEADERS internal/default_compile_options.h internal/base_context.h ) @@ -67,10 +78,10 @@ add_library( sixtrack_opencl OBJECT target_include_directories( sixtrack_opencl PUBLIC $ - PUBLIC $ ) + PUBLIC $ ) set_target_properties( sixtrack_opencl PROPERTIES LINKER_LANGUAGE C - POSITION_INDEPENDENT_CODE True CXX_STANDARD 11 CXX_STANDARD_REQUIRED ON ) + POSITION_INDEPENDENT_CODE ON CXX_STANDARD 11 CXX_STANDARD_REQUIRED ON ) target_compile_definitions( sixtrack_opencl PUBLIC ${SIXTRACKLIB_NAMESPACE_FLAGS} -DCL_USE_DEPRECATED_OPENCL_1_2_APIS ) @@ -86,7 +97,7 @@ set( SIXTRACKL_LIBRARY_MODULES ${SIXTRACKL_LIBRARY_MODULES} $ CACHE INTERNAL "" FORCE ) set( SIXTRACKL_LINK_LIBRARIES ${SIXTRACKL_LINK_LIBRARIES} - ${SIXTRACKL_OPENCL_LIBRARY} CACHE INTERNAL "" FORCE ) + ${SIXTRL_OPENCL_LIBRARIES} CACHE INTERNAL "" FORCE ) # ------------------------------------------------------------------------------ # install : diff --git a/sixtracklib/opencl/cl.h.template b/sixtracklib/opencl/cl.h.template deleted file mode 100644 index b0efc0960..000000000 --- a/sixtracklib/opencl/cl.h.template +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef SIXTRACKLIB_OPENCL_CL_H__ -#define SIXTRACKLIB_OPENCL_CL_H__ - -#if !defined( SIXTRL_NO_INCLUDES ) - #include "sixtracklib/common/definitions.h" -#endif /* !defined( SIXTRL_NO_INCLUDES ) */ - -#if defined( __cplusplus ) && !defined( _GPUCODE ) && \ - !defined( __CUDACC__ ) && !defined( __CUDA_ARCH__ ) - - #if !defined( SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS ) - #define SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS \ - "@SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS@" - #endif /* !defined( SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS ) */ - - #if !defined( CL_HPP_MINIMUM_OPENCL_VERSION ) - #define CL_HPP_MINIMUM_OPENCL_VERSION @SIXTRACKL_MIN_OPENCL_VERSION@ - #endif /* !defined( CL_HPP_MINIMUM_OPENCL_VERSION ) */ - - #if !defined( CL_HPP_TARGET_OPENCL_VERSION ) - #define CL_HPP_TARGET_OPENCL_VERSION @SIXTRACKL_TARGET_OPENCL_VERSION@ - #endif /* !defined( CL_HPP_TARGET_OPENCL_VERSION ) */ - - #if !defined( SIXTRL_USES_CL2_HPP ) && !defined( SIXTRL_USES_CL_HPP ) - #define SIXTRL_USES_CL2_HPP @SIXTRL_USES_CL2_HPP@ - #define SIXTRL_USES_CL_HPP @SIXTRL_USES_CL_HPP@ - #endif /* !defined(SIXTRL_USES_CL2_HPP) && !defined(SIXTRL_USES_CL_HPP) */ - - #if defined( CL_HPP_MINIMUM_OPENCL_VERSION ) && \ - ( !defined(SIXTRL_USES_CL2_HPP) || SIXTRL_USES_CL2_HPP == 1 ) && \ - CL_HPP_MINIMUM_OPENCL_VERSION < 200 && \ - !defined( CL_HPP_CL_1_2_DEFAULT_BUILD ) - #define CL_HPP_CL_1_2_DEFAULT_BUILD - #endif /* CL_HPP_CL_1_2_DEFAULT_BUILD */ - -@SIXTRL_OPENCL_ENABLE_EXCEPTION_STR@ - #if !defined( SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS ) - #define SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS @SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG@ - #endif /* !defined( SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS ) */ - - /* Attempt to disable -Wignored-attributes warnings on affected compilers - * only for the C++ OpenCL header -> cf. - * http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1221 for reference */ - - #if defined( __GNUC__ ) && __GNUC__ >= 6 - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wignored-attributes" - #endif - - #include <@SIXTRL_OPENCL_CL_HPP@> - - #if defined( __GNUC__ ) && __GNUC__ >= 6 - #pragma GCC diagnostic pop - #endif - -#endif /* C++, Host */ - -#if !defined( __cplusplus ) && !defined( _GPUCODE ) && !defined( __CUDACC__ ) - #include -#endif /* C99, Host */ - -#endif /* SIXTRACKLIB_OPENCL_CL_H__ */ -/* end: sixtracklib/opencl/cl.h */ diff --git a/sixtracklib/opencl/opencl.h.template b/sixtracklib/opencl/opencl.h.template new file mode 100644 index 000000000..ac42c3fce --- /dev/null +++ b/sixtracklib/opencl/opencl.h.template @@ -0,0 +1,27 @@ +#ifndef SIXTRACKLIB_OPENCL_OPENCL_H__ +#define SIXTRACKLIB_OPENCL_OPENCL_H__ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/definitions.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS ) + #define SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS "@SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS@" +#endif /* !defined( SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS ) */ + +#if !defined( __cplusplus ) && !defined( _GPUCODE ) + + #if !defined( SIXTRL_OPENCL_C99_HEADER_FILE ) + #define SIXTRL_OPENCL_C99_HEADER_FILE "@SIXTRL_OPENCL_C99_HEADER_FILE@" + #endif /* !defined( SIXTRL_OPENCL_C99_HEADER_FILE ) */ + + #if !defined( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) + #define SIXTRL_OPENCL_C99_HEADER_FILE_VERSION @SIXTRL_OPENCL_C99_HEADER_FILE_VERSION@ + #endif /* !defined( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) */ + + #if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) + #include <@SIXTRL_OPENCL_C99_HEADER_FILE@> + #endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ + +#endif /* C99, Host */ +#endif /* SIXTRACKLIB_OPENCL_OPENCL_H__ */ diff --git a/sixtracklib/opencl/opencl.hpp.template b/sixtracklib/opencl/opencl.hpp.template new file mode 100644 index 000000000..a6e5bacaf --- /dev/null +++ b/sixtracklib/opencl/opencl.hpp.template @@ -0,0 +1,53 @@ +#ifndef SIXTRACKLIB_OPENCL_OPENCL_HPP__ +#define SIXTRACKLIB_OPENCL_OPENCL_HPP__ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/definitions.h" + #include "sixtracklib/opencl/opencl.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if defined( __cplusplus ) && !defined( _GPUCODE ) + +#if !defined( CL_HPP_MINIMUM_OPENCL_VERSION ) + #define CL_HPP_MINIMUM_OPENCL_VERSION @SIXTRACKL_MIN_OPENCL_VERSION@ +#endif /* !defined( CL_HPP_MINIMUM_OPENCL_VERSION ) */ + +#if !defined( CL_HPP_TARGET_OPENCL_VERSION ) + #define CL_HPP_TARGET_OPENCL_VERSION @SIXTRACKL_MIN_OPENCL_VERSION@ +#endif /* !defined( CL_HPP_TARGET_OPENCL_VERSION ) */ + +#if !defined( SIXTRL_OPENCL_CXX_HEADER_FILE ) + #define SIXTRL_OPENCL_CXX_HEADER_FILE "@SIXTRL_OPENCL_CXX_HEADER_FILE@" +#endif /* !defined( SIXTRL_OPENCL_CXX_HEADER_FILE ) */ + +#if !defined( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) + #define SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION @SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION@ +#endif /* !defined( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) */ + +#if !defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) + #define SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG @SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG@ +#endif /* !defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) */ + +@SIXTRL_OPENCL_ENABLE_EXCEPTION_STR@ + +#if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) + + /* Attempt to disable -Wignored-attributes warnings on affected compilers + * only for the C++ OpenCL header -> cf. + * http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1221 for reference */ + + #if defined( __GNUC__ ) && __GNUC__ >= 6 + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif + + #include <@SIXTRL_OPENCL_CXX_HEADER_FILE@> + + #if defined( __GNUC__ ) && __GNUC__ >= 6 + #pragma GCC diagnostic pop + #endif + +#endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ +#endif /* C++, Host */ + +#endif /* SIXTRACKLIB_OPENCL_OPENCL_HPP__ */ From 04c7bf9045394ed49b00fe7f8dd15b3c3ef67802 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 13:00:38 +0100 Subject: [PATCH 05/77] sixtracklib: uses new opencl wrapper headers in sixtracklib.h and sixtracklib.hpp --- sixtracklib/sixtracklib.h | 2 +- sixtracklib/sixtracklib.hpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sixtracklib/sixtracklib.h b/sixtracklib/sixtracklib.h index 45003b675..f17bb9fe9 100644 --- a/sixtracklib/sixtracklib.h +++ b/sixtracklib/sixtracklib.h @@ -74,7 +74,7 @@ #if defined( SIXTRACKLIB_ENABLE_MODULE_OPENCL ) && \ ( SIXTRACKLIB_ENABLE_MODULE_OPENCL == 1 ) - #include "sixtracklib/opencl/cl.h" + #include "sixtracklib/opencl/opencl.h" #include "sixtracklib/opencl/argument.h" #include "sixtracklib/opencl/context.h" #include "sixtracklib/opencl/track_job_cl.h" diff --git a/sixtracklib/sixtracklib.hpp b/sixtracklib/sixtracklib.hpp index da36bf39a..9afb1950d 100644 --- a/sixtracklib/sixtracklib.hpp +++ b/sixtracklib/sixtracklib.hpp @@ -30,6 +30,8 @@ #if defined( SIXTRACKLIB_ENABLE_MODULE_OPENCL ) && \ ( SIXTRACKLIB_ENABLE_MODULE_OPENCL == 1 ) + #include "sixtracklib/opencl/opencl.hpp" + #endif /* defined( SIXTRACKLIB_ENABLE_MODULE_OPENCL ) */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ From 23d59eb2be7c17a18e7789696e97ed29cba0ff6e Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 13:02:00 +0100 Subject: [PATCH 06/77] sixtracklib/opencl: uses new OpenCL wrapper headers in module --- sixtracklib/opencl/argument.h | 6 +++++- sixtracklib/opencl/context.h | 3 ++- sixtracklib/opencl/internal/argument.cpp | 6 +++++- sixtracklib/opencl/internal/base_context.cpp | 6 +++++- sixtracklib/opencl/internal/base_context.h | 10 +++++----- sixtracklib/opencl/internal/context.cpp | 3 ++- sixtracklib/opencl/internal/track_job_cl.cpp | 2 ++ 7 files changed, 26 insertions(+), 10 deletions(-) diff --git a/sixtracklib/opencl/argument.h b/sixtracklib/opencl/argument.h index a7f7a61dd..5dbbe0b48 100644 --- a/sixtracklib/opencl/argument.h +++ b/sixtracklib/opencl/argument.h @@ -44,10 +44,14 @@ struct NS(Buffer); #endif /* defined( __cplusplus ) */ #if !defined( SIXTRL_NO_INCLUDES ) - #include "sixtracklib/opencl/cl.h" + #include "sixtracklib/opencl/opencl.h" #endif /* !defined( SIXTRL_NO_INCLUDES ) */ #if defined( __cplusplus ) +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/opencl/opencl.hpp" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + namespace SIXTRL_CXX_NAMESPACE { class ClContextBase; diff --git a/sixtracklib/opencl/context.h b/sixtracklib/opencl/context.h index e16420f97..4d825c2f5 100644 --- a/sixtracklib/opencl/context.h +++ b/sixtracklib/opencl/context.h @@ -28,7 +28,7 @@ #include "sixtracklib/common/buffer.h" #include "sixtracklib/common/particles.h" - #include "sixtracklib/opencl/cl.h" + #include "sixtracklib/opencl/opencl.h" #include "sixtracklib/opencl/internal/base_context.h" #include "sixtracklib/opencl/argument.h" #endif /* !defined( SIXTRL_NO_INCLUDES ) */ @@ -38,6 +38,7 @@ #if !defined( SIXTRL_NO_INCLUDES ) #include "sixtracklib/common/buffer.hpp" #include "sixtracklib/common/particles.hpp" + #include "sixtracklib/opencl/opencl.hpp" #endif /* !defined( SIXTRL_NO_INCLUDES ) */ namespace SIXTRL_CXX_NAMESPACE diff --git a/sixtracklib/opencl/internal/argument.cpp b/sixtracklib/opencl/internal/argument.cpp index c6a8993a1..774b12608 100644 --- a/sixtracklib/opencl/internal/argument.cpp +++ b/sixtracklib/opencl/internal/argument.cpp @@ -19,9 +19,12 @@ #include "sixtracklib/common/control/definitions.h" #include "sixtracklib/common/control/debug_register.h" #include "sixtracklib/common/buffer.h" -#include "sixtracklib/opencl/cl.h" +#include "sixtracklib/opencl/opencl.h" #include "sixtracklib/opencl/context.h" +#if defined( __cplusplus ) +#include "sixtracklib/opencl/opencl.hpp" + namespace SIXTRL_CXX_NAMESPACE { ClArgument::ClArgument( @@ -521,6 +524,7 @@ namespace SIXTRL_CXX_NAMESPACE return; } } +#endif /* C++ */ /* ========================================================================= */ diff --git a/sixtracklib/opencl/internal/base_context.cpp b/sixtracklib/opencl/internal/base_context.cpp index 2b1a1ff81..478cf8c49 100644 --- a/sixtracklib/opencl/internal/base_context.cpp +++ b/sixtracklib/opencl/internal/base_context.cpp @@ -25,9 +25,12 @@ #include "sixtracklib/common/generated/path.h" #include "sixtracklib/common/context/compute_arch.h" -#include "sixtracklib/opencl/cl.h" +#include "sixtracklib/opencl/opencl.h" #include "sixtracklib/opencl/argument.h" +#if defined( __cplusplus ) +#include "sixtracklib/opencl/opencl.hpp" + namespace SIXTRL_CXX_NAMESPACE { namespace @@ -3188,6 +3191,7 @@ namespace SIXTRL_CXX_NAMESPACE return success; } } +#endif /* C++ */ /* ------------------------------------------------------------------------- */ /* ----- Implementation of C Wrapper functions ---- */ diff --git a/sixtracklib/opencl/internal/base_context.h b/sixtracklib/opencl/internal/base_context.h index b39744bc9..baedac9b7 100644 --- a/sixtracklib/opencl/internal/base_context.h +++ b/sixtracklib/opencl/internal/base_context.h @@ -16,7 +16,7 @@ #include "sixtracklib/common/generated/path.h" #include "sixtracklib/common/control/definitions.h" #include "sixtracklib/common/context/compute_arch.h" - #include "sixtracklib/opencl/cl.h" + #include "sixtracklib/opencl/opencl.h" #endif /* !defined( SIXTRL_NO_INCLUDES ) */ #if defined( __cplusplus ) @@ -34,6 +34,10 @@ #include #endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ + #if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/opencl/opencl.hpp" + #endif /* !defined( SIXTRL_NO_INCLUDES ) */ + using NS(arch_size_t) = std::size_t; namespace SIXTRL_CXX_NAMESPACE @@ -1233,10 +1237,6 @@ typedef SIXTRL_CXX_NAMESPACE::ClContextBase::kernel_arg_type_t #else /* defined( __cplusplus ) */ - #if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) - #include - #endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ - typedef void NS(ClContextBase); typedef uint32_t NS(kernel_arg_type_t); diff --git a/sixtracklib/opencl/internal/context.cpp b/sixtracklib/opencl/internal/context.cpp index c2e563440..d51b5d68b 100644 --- a/sixtracklib/opencl/internal/context.cpp +++ b/sixtracklib/opencl/internal/context.cpp @@ -24,9 +24,10 @@ #include "sixtracklib/common/output/elem_by_elem_config.h" #include "sixtracklib/common/output/output_buffer.h" #include "sixtracklib/opencl/internal/base_context.h" -#include "sixtracklib/opencl/cl.h" +#include "sixtracklib/opencl/opencl.h" #if defined( __cplusplus ) +#include "sixtracklib/opencl/opencl.hpp" namespace SIXTRL_CXX_NAMESPACE { diff --git a/sixtracklib/opencl/internal/track_job_cl.cpp b/sixtracklib/opencl/internal/track_job_cl.cpp index 9d139a2ce..9742b617a 100644 --- a/sixtracklib/opencl/internal/track_job_cl.cpp +++ b/sixtracklib/opencl/internal/track_job_cl.cpp @@ -9,6 +9,7 @@ #if defined( __cplusplus ) #include "sixtracklib/common/buffer.hpp" #include "sixtracklib/common/output/output_buffer.hpp" + #include "sixtracklib/opencl/opencl.hpp" #endif /* defined( __cplusplus ) */ #include "sixtracklib/common/definitions.h" @@ -18,6 +19,7 @@ #include "sixtracklib/opencl/context.h" #include "sixtracklib/opencl/argument.h" + #include "sixtracklib/opencl/opencl.h" #endif /* !defined( SIXTRL_NO_INCLUDES ) */ namespace SIXTRL_CXX_NAMESPACE From 5324f9f72811f1e5b4452c7323fd1b935516c28a Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 13:04:22 +0100 Subject: [PATCH 07/77] examples: use common compiler flags for C99 and C++ examples --- examples/c99/CMakeLists.txt | 9 +++------ examples/cxx/CMakeLists.txt | 11 ++--------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/examples/c99/CMakeLists.txt b/examples/c99/CMakeLists.txt index ba4c3406d..937b36bd2 100644 --- a/examples/c99/CMakeLists.txt +++ b/examples/c99/CMakeLists.txt @@ -107,16 +107,13 @@ set( ALL_EXAMPLE_TARGETS ${EXAMPLE_TARGETS} ${EXAMPLE_OPENCL_TARGETS} ${EXAMPLE_CUDA_TARGETS} ) if( ALL_EXAMPLE_TARGETS ) - set( EXAMPLE_COMPILE_OPTIONS ${SIXTRACKLIB_CPU_FLAGS} ) - list( APPEND EXAMPLE_COMPILE_OPTIONS -Wall ) - list( APPEND EXAMPLE_COMPILE_OPTIONS -Werror ) - list( APPEND EXAMPLE_COMPILE_OPTIONS -pedantic ) - set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY LINKER_LANGUAGE C ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY C_STANDARD 99 ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY C_STANDARD_REQUIRED ON ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} - APPEND PROPERTY COMPILE_OPTIONS ${EXAMPLE_COMPILE_OPTIONS} ) + APPEND PROPERTY COMPILE_OPTIONS + ${SIXTRACKLIB_CPU_FLAGS} ${SIXTRACKLIB_C99_FLAGS} + ${SIXTRACKL_C99_AUTOVEC_FLAGS} ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY LINK_LIBRARIES ${EXAMPLE_LINK_LIBRARIES} ) diff --git a/examples/cxx/CMakeLists.txt b/examples/cxx/CMakeLists.txt index 19eb7cc75..fef6abfba 100644 --- a/examples/cxx/CMakeLists.txt +++ b/examples/cxx/CMakeLists.txt @@ -55,17 +55,12 @@ set( ALL_EXAMPLE_TARGETS ${EXAMPLE_TARGETS} ${EXAMPLE_OPENCL_TARGETS} ${EXAMPLE_CUDA_TARGETS} ) if( ALL_EXAMPLE_TARGETS ) - - set( EXAMPLE_COMPILE_OPTIONS ${SIXTRACKLIB_CPU_FLAGS} ) - list( APPEND EXAMPLE_COMPILE_OPTIONS -Wall ) - list( APPEND EXAMPLE_COMPILE_OPTIONS -Werror ) - list( APPEND EXAMPLE_COMPILE_OPTIONS -pedantic ) - set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY LINKER_LANGUAGE CXX ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY CXX_STANDARD 11 ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY CXX_STANDARD_REQUIRED ON ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} - APPEND PROPERTY COMPILE_OPTIONS ${EXAMPLE_COMPILE_OPTIONS} ) + APPEND PROPERTY COMPILE_OPTIONS ${SIXTRACKLIB_CPU_FLAGS} + ${SIXTRACKLIB_CXX_FLAGS} ${SIXTRACKL_CXX_AUTOVEC_FLAGS} ) set_property( TARGET ${ALL_EXAMPLE_TARGETS} PROPERTY LINK_LIBRARIES ${EXAMPLE_LINK_LIBRARIES} ) @@ -186,5 +181,3 @@ if( SIXTRACKL_INSTALL_EXAMPLES AND ALL_EXAMPLE_TARGETS ) configure_file( \"${CMAKELISTS_TXT_IN_PATH}\" \"${CMAKELISTS_TXT_INSTALL_PATH}\" ESCAPE_QUOTES @ONLY )" ) endif() - -# end: examples/cxx/CMakeLists.txt From c6fc50135bdda23bc4114f6ec98a08eab82e4fe7 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 13:07:03 +0100 Subject: [PATCH 08/77] tests/sixtracklib/opencl: uses common compiler flags for OpenCL tests --- tests/sixtracklib/opencl/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/sixtracklib/opencl/CMakeLists.txt b/tests/sixtracklib/opencl/CMakeLists.txt index e8d3e4647..ec1b0d7c7 100644 --- a/tests/sixtracklib/opencl/CMakeLists.txt +++ b/tests/sixtracklib/opencl/CMakeLists.txt @@ -284,7 +284,8 @@ if( GTEST_FOUND ) set_property( TARGET ${C99_UNIT_TEST_TARGETS} PROPERTY CXX_STANDARD 11 ) set_property( TARGET ${C99_UNIT_TEST_TARGETS} PROPERTY CXX_STANDARD_REQUIRED ON ) set_property( TARGET ${C99_UNIT_TEST_TARGETS} PROPERTY COMPILE_OPTIONS - ${SIXTRACKLIB_CPU_FLAGS} -Wall -Werror -pedantic ) + ${SIXTRACKLIB_CPU_FLAGS} ${SIXTRACKLIB_C99_FLAGS} + ${SIXTRACKL_C99_AUTOVEC_FLAGS} ) endif() if( CXX_UNIT_TEST_TARGETS ) @@ -303,6 +304,7 @@ if( GTEST_FOUND ) set_property( TARGET ${CXX_UNIT_TEST_TARGETS} PROPERTY CXX_STANDARD 11 ) set_property( TARGET ${CXX_UNIT_TEST_TARGETS} PROPERTY CXX_STANDARD_REQUIRED ON ) set_property( TARGET ${CXX_UNIT_TEST_TARGETS} PROPERTY COMPILE_OPTIONS - ${SIXTRACKLIB_CPU_FLAGS} -Wall -Werror -pedantic ) + ${SIXTRACKLIB_CPU_FLAGS} ${SIXTRACKLIB_CXX_FLAGS} + ${SIXTRACKL_CXX_AUTOVEC_FLAGS} ) endif() endif() From 9827bc8b7ca276f8c913c8de31dbbf0963f02bbd Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 13:09:16 +0100 Subject: [PATCH 09/77] external/CL: updates fallback opencl headers --- external/CL/cl2.hpp | 10238 +------------------------------------- external/CL/opencl.h | 33 + external/CL/opencl.hpp | 10285 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 10336 insertions(+), 10220 deletions(-) create mode 100644 external/CL/opencl.h create mode 100644 external/CL/opencl.hpp diff --git a/external/CL/cl2.hpp b/external/CL/cl2.hpp index 7b23e1ab4..18cfe7017 100644 --- a/external/CL/cl2.hpp +++ b/external/CL/cl2.hpp @@ -1,10220 +1,18 @@ -/******************************************************************************* - * Copyright (c) 2008-2016 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS - * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS - * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT - * https://www.khronos.org/registry/ - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33), - * OpenCL 1.2 (rev 15), OpenCL 2.0 (rev 29), OpenCL 2.1 (rev 17), - * and OpenCL 2.2 (V2.2-11). - * \author Lee Howes and Bruce Merry - * - * Derived from the OpenCL 1.x C++ bindings written by - * Benedict R. Gaster, Laurent Morichetti and Lee Howes - * With additions and fixes from: - * Brian Cole, March 3rd 2010 and April 2012 - * Matt Gruenke, April 2012. - * Bruce Merry, February 2013. - * Tom Deakin and Simon McIntosh-Smith, July 2013 - * James Price, 2015- - * \version 2.2.0 - * \date 2019-09-18 - * - * Optional extension support - * - * cl_ext_device_fission - * #define CL_HPP_USE_CL_DEVICE_FISSION - * cl_khr_d3d10_sharing - * #define CL_HPP_USE_DX_INTEROP - * cl_khr_sub_groups - * #define CL_HPP_USE_CL_SUB_GROUPS_KHR - * cl_khr_image2d_from_buffer - * #define CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR - * - * Doxygen documentation for this header is available here: - * - * http://khronosgroup.github.io/OpenCL-CLHPP/ - * - * The latest version of this header can be found on the GitHub releases page: - * - * https://github.com/KhronosGroup/OpenCL-CLHPP/releases - * - * Bugs and patches can be submitted to the GitHub repository: - * - * https://github.com/KhronosGroup/OpenCL-CLHPP - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * The interface is contained with a single C++ header file \em cl2.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings; it is enough to simply include \em cl2.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * There are numerous compatibility, portability and memory management - * fixes in the new header as well as additional OpenCL 2.0 features. - * As a result the header is not directly backward compatible and for this - * reason we release it as cl2.hpp rather than a new version of cl.hpp. - * - * - * \section compatibility Compatibility - * Due to the evolution of the underlying OpenCL API the 2.0 C++ bindings - * include an updated approach to defining supported feature versions - * and the range of valid underlying OpenCL runtime versions supported. - * - * The combination of preprocessor macros CL_HPP_TARGET_OPENCL_VERSION and - * CL_HPP_MINIMUM_OPENCL_VERSION control this range. These are three digit - * decimal values representing OpenCL runime versions. The default for - * the target is 200, representing OpenCL 2.0 and the minimum is also - * defined as 200. These settings would use 2.0 API calls only. - * If backward compatibility with a 1.2 runtime is required, the minimum - * version may be set to 120. - * - * Note that this is a compile-time setting, and so affects linking against - * a particular SDK version rather than the versioning of the loaded runtime. - * - * The earlier versions of the header included basic vector and string - * classes based loosely on STL versions. These were difficult to - * maintain and very rarely used. For the 2.0 header we now assume - * the presence of the standard library unless requested otherwise. - * We use std::array, std::vector, std::shared_ptr and std::string - * throughout to safely manage memory and reduce the chance of a - * recurrance of earlier memory management bugs. - * - * These classes are used through typedefs in the cl namespace: - * cl::array, cl::vector, cl::pointer and cl::string. - * In addition cl::allocate_pointer forwards to std::allocate_shared - * by default. - * In all cases these standard library classes can be replaced with - * custom interface-compatible versions using the CL_HPP_NO_STD_ARRAY, - * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_UNIQUE_PTR and - * CL_HPP_NO_STD_STRING macros. - * - * The OpenCL 1.x versions of the C++ bindings included a size_t wrapper - * class to interface with kernel enqueue. This caused unpleasant interactions - * with the standard size_t declaration and led to namespacing bugs. - * In the 2.0 version we have replaced this with a std::array-based interface. - * However, the old behaviour can be regained for backward compatibility - * using the CL_HPP_ENABLE_SIZE_T_COMPATIBILITY macro. - * - * Finally, the program construction interface used a clumsy vector-of-pairs - * design in the earlier versions. We have replaced that with a cleaner - * vector-of-vectors and vector-of-strings design. However, for backward - * compatibility old behaviour can be regained with the - * CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY macro. - * - * In OpenCL 2.0 OpenCL C is not entirely backward compatibility with - * earlier versions. As a result a flag must be passed to the OpenCL C - * compiled to request OpenCL 2.0 compilation of kernels with 1.2 as - * the default in the absence of the flag. - * In some cases the C++ bindings automatically compile code for ease. - * For those cases the compilation defaults to OpenCL C 2.0. - * If this is not wanted, the CL_HPP_CL_1_2_DEFAULT_BUILD macro may - * be specified to assume 1.2 compilation. - * If more fine-grained decisions on a per-kernel bases are required - * then explicit build operations that take the flag should be used. - * - * - * \section parameterization Parameters - * This header may be parameterized by a set of preprocessor macros. - * - * - CL_HPP_TARGET_OPENCL_VERSION - * - * Defines the target OpenCL runtime version to build the header - * against. Defaults to 200, representing OpenCL 2.0. - * - * - CL_HPP_NO_STD_STRING - * - * Do not use the standard library string class. cl::string is not - * defined and may be defined by the user before cl2.hpp is - * included. - * - * - CL_HPP_NO_STD_VECTOR - * - * Do not use the standard library vector class. cl::vector is not - * defined and may be defined by the user before cl2.hpp is - * included. - * - * - CL_HPP_NO_STD_ARRAY - * - * Do not use the standard library array class. cl::array is not - * defined and may be defined by the user before cl2.hpp is - * included. - * - * - CL_HPP_NO_STD_UNIQUE_PTR - * - * Do not use the standard library unique_ptr class. cl::pointer and - * the cl::allocate_pointer functions are not defined and may be - * defined by the user before cl2.hpp is included. - * - * - CL_HPP_ENABLE_DEVICE_FISSION - * - * Enables device fission for OpenCL 1.2 platforms. - * - * - CL_HPP_ENABLE_EXCEPTIONS - * - * Enable exceptions for use in the C++ bindings header. This is the - * preferred error handling mechanism but is not required. - * - * - CL_HPP_ENABLE_SIZE_T_COMPATIBILITY - * - * Backward compatibility option to support cl.hpp-style size_t - * class. Replaces the updated std::array derived version and - * removal of size_t from the namespace. Note that in this case the - * new size_t class is placed in the cl::compatibility namespace and - * thus requires an additional using declaration for direct backward - * compatibility. - * - * - CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY - * - * Enable older vector of pairs interface for construction of - * programs. - * - * - CL_HPP_CL_1_2_DEFAULT_BUILD - * - * Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0 - * applies to use of cl::Program construction and other program - * build variants. - * - * - CL_HPP_USE_CL_SUB_GROUPS_KHR - * - * Enable the cl_khr_subgroups extension. - * - * - CL_HPP_USE_IL_KHR - * - * Enable the cl_khr_il_program extension. - * - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - #define CL_HPP_ENABLE_EXCEPTIONS - #define CL_HPP_TARGET_OPENCL_VERSION 200 - - #include - #include - #include - #include - #include - - const int numElements = 32; - - int main(void) - { - // Filter for a 2.0 platform and set it as the default - std::vector platforms; - cl::Platform::get(&platforms); - cl::Platform plat; - for (auto &p : platforms) { - std::string platver = p.getInfo(); - if (platver.find("OpenCL 2.") != std::string::npos) { - plat = p; - } - } - if (plat() == 0) { - std::cout << "No OpenCL 2.0 platform found."; - return -1; - } - - cl::Platform newP = cl::Platform::setDefault(plat); - if (newP != plat) { - std::cout << "Error setting default platform."; - return -1; - } - - // Use C++11 raw string literals for kernel source code - std::string kernel1{R"CLC( - global int globalA; - kernel void updateGlobal() - { - globalA = 75; - } - )CLC"}; - std::string kernel2{R"CLC( - typedef struct { global int *bar; } Foo; - kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, - global int *output, int val, write_only pipe int outPipe, queue_t childQueue) - { - output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar); - write_pipe(outPipe, &val); - queue_t default_queue = get_default_queue(); - ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); - - // Have a child kernel write into third quarter of output - enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, - ^{ - output[get_global_size(0)*2 + get_global_id(0)] = - inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA; - }); - - // Have a child kernel write into last quarter of output - enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, - ^{ - output[get_global_size(0)*3 + get_global_id(0)] = - inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2; - }); - } - )CLC"}; - - // New simpler string interface style - std::vector programStrings {kernel1, kernel2}; - - cl::Program vectorAddProgram(programStrings); - try { - vectorAddProgram.build("-cl-std=CL2.0"); - } - catch (...) { - // Print build info for all devices - cl_int buildErr = CL_SUCCESS; - auto buildInfo = vectorAddProgram.getBuildInfo(&buildErr); - for (auto &pair : buildInfo) { - std::cerr << pair.second << std::endl << std::endl; - } - - return 1; - } - - typedef struct { int *bar; } Foo; - - // Get and run kernel that initializes the program-scope global - // A test for kernels that take no arguments - auto program2Kernel = - cl::KernelFunctor<>(vectorAddProgram, "updateGlobal"); - program2Kernel( - cl::EnqueueArgs( - cl::NDRange(1))); - - ////////////////// - // SVM allocations - - auto anSVMInt = cl::allocate_svm>(); - *anSVMInt = 5; - cl::SVMAllocator>> svmAllocReadOnly; - auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); - fooPointer->bar = anSVMInt.get(); - cl::SVMAllocator> svmAlloc; - std::vector>> inputA(numElements, 1, svmAlloc); - cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); - - // - ////////////// - - // Traditional cl_mem allocations - std::vector output(numElements, 0xdeadbeef); - cl::Buffer outputBuffer(begin(output), end(output), false); - cl::Pipe aPipe(sizeof(cl_int), numElements / 2); - - // Default command queue, also passed in as a parameter - cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( - cl::Context::getDefault(), cl::Device::getDefault()); - - auto vectorAddKernel = - cl::KernelFunctor< - decltype(fooPointer)&, - int*, - cl::coarse_svm_vector&, - cl::Buffer, - int, - cl::Pipe&, - cl::DeviceCommandQueue - >(vectorAddProgram, "vectorAdd"); - - // Ensure that the additional SVM pointer is available to the kernel - // This one was not passed as a parameter - vectorAddKernel.setSVMPointers(anSVMInt); - - // Hand control of coarse allocations to runtime - cl::enqueueUnmapSVM(anSVMInt); - cl::enqueueUnmapSVM(fooPointer); - cl::unmapSVM(inputB); - cl::unmapSVM(output2); - - cl_int error; - vectorAddKernel( - cl::EnqueueArgs( - cl::NDRange(numElements/2), - cl::NDRange(numElements/2)), - fooPointer, - inputA.data(), - inputB, - outputBuffer, - 3, - aPipe, - defaultDeviceQueue, - error - ); - - cl::copy(outputBuffer, begin(output), end(output)); - // Grab the SVM output vector using a map - cl::mapSVM(output2); - - cl::Device d = cl::Device::getDefault(); - - std::cout << "Output:\n"; - for (int i = 1; i < numElements; ++i) { - std::cout << "\t" << output[i] << "\n"; - } - std::cout << "\n\n"; - - return 0; - } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -/* Handle deprecated preprocessor definitions. In each case, we only check for - * the old name if the new name is not defined, so that user code can define - * both and hence work with either version of the bindings. - */ -#if !defined(CL_HPP_USE_DX_INTEROP) && defined(USE_DX_INTEROP) -# pragma message("cl2.hpp: USE_DX_INTEROP is deprecated. Define CL_HPP_USE_DX_INTEROP instead") -# define CL_HPP_USE_DX_INTEROP -#endif -#if !defined(CL_HPP_USE_CL_DEVICE_FISSION) && defined(USE_CL_DEVICE_FISSION) -# pragma message("cl2.hpp: USE_CL_DEVICE_FISSION is deprecated. Define CL_HPP_USE_CL_DEVICE_FISSION instead") -# define CL_HPP_USE_CL_DEVICE_FISSION -#endif -#if !defined(CL_HPP_ENABLE_EXCEPTIONS) && defined(__CL_ENABLE_EXCEPTIONS) -# pragma message("cl2.hpp: __CL_ENABLE_EXCEPTIONS is deprecated. Define CL_HPP_ENABLE_EXCEPTIONS instead") -# define CL_HPP_ENABLE_EXCEPTIONS -#endif -#if !defined(CL_HPP_NO_STD_VECTOR) && defined(__NO_STD_VECTOR) -# pragma message("cl2.hpp: __NO_STD_VECTOR is deprecated. Define CL_HPP_NO_STD_VECTOR instead") -# define CL_HPP_NO_STD_VECTOR -#endif -#if !defined(CL_HPP_NO_STD_STRING) && defined(__NO_STD_STRING) -# pragma message("cl2.hpp: __NO_STD_STRING is deprecated. Define CL_HPP_NO_STD_STRING instead") -# define CL_HPP_NO_STD_STRING -#endif -#if defined(VECTOR_CLASS) -# pragma message("cl2.hpp: VECTOR_CLASS is deprecated. Alias cl::vector instead") -#endif -#if defined(STRING_CLASS) -# pragma message("cl2.hpp: STRING_CLASS is deprecated. Alias cl::string instead.") -#endif -#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) && defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -# pragma message("cl2.hpp: __CL_USER_OVERRIDE_ERROR_STRINGS is deprecated. Define CL_HPP_USER_OVERRIDE_ERROR_STRINGS instead") -# define CL_HPP_USER_OVERRIDE_ERROR_STRINGS -#endif - -/* Warn about features that are no longer supported - */ -#if defined(__USE_DEV_VECTOR) -# pragma message("cl2.hpp: __USE_DEV_VECTOR is no longer supported. Expect compilation errors") -#endif -#if defined(__USE_DEV_STRING) -# pragma message("cl2.hpp: __USE_DEV_STRING is no longer supported. Expect compilation errors") -#endif - -/* Detect which version to target */ -#if !defined(CL_HPP_TARGET_OPENCL_VERSION) -# pragma message("cl2.hpp: CL_HPP_TARGET_OPENCL_VERSION is not defined. It will default to 220 (OpenCL 2.2)") -# define CL_HPP_TARGET_OPENCL_VERSION 220 -#endif -#if CL_HPP_TARGET_OPENCL_VERSION != 100 && \ - CL_HPP_TARGET_OPENCL_VERSION != 110 && \ - CL_HPP_TARGET_OPENCL_VERSION != 120 && \ - CL_HPP_TARGET_OPENCL_VERSION != 200 && \ - CL_HPP_TARGET_OPENCL_VERSION != 210 && \ - CL_HPP_TARGET_OPENCL_VERSION != 220 -# pragma message("cl2.hpp: CL_HPP_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210 or 220). It will be set to 220") -# undef CL_HPP_TARGET_OPENCL_VERSION -# define CL_HPP_TARGET_OPENCL_VERSION 220 -#endif - -/* Forward target OpenCL version to C headers if necessary */ -#if defined(CL_TARGET_OPENCL_VERSION) -/* Warn if prior definition of CL_TARGET_OPENCL_VERSION is lower than - * requested C++ bindings version */ -#if CL_TARGET_OPENCL_VERSION < CL_HPP_TARGET_OPENCL_VERSION -# pragma message("CL_TARGET_OPENCL_VERSION is already defined as is lower than CL_HPP_TARGET_OPENCL_VERSION") -#endif -#else -# define CL_TARGET_OPENCL_VERSION CL_HPP_TARGET_OPENCL_VERSION -#endif - -#if !defined(CL_HPP_MINIMUM_OPENCL_VERSION) -# define CL_HPP_MINIMUM_OPENCL_VERSION 200 -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION != 100 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 110 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 120 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 200 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 210 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 220 -# pragma message("cl2.hpp: CL_HPP_MINIMUM_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210 or 220). It will be set to 100") -# undef CL_HPP_MINIMUM_OPENCL_VERSION -# define CL_HPP_MINIMUM_OPENCL_VERSION 100 -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION > CL_HPP_TARGET_OPENCL_VERSION -# error "CL_HPP_MINIMUM_OPENCL_VERSION must not be greater than CL_HPP_TARGET_OPENCL_VERSION" -#endif - -#if CL_HPP_MINIMUM_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) -# define CL_USE_DEPRECATED_OPENCL_1_0_APIS -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -# define CL_USE_DEPRECATED_OPENCL_1_1_APIS -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) -# define CL_USE_DEPRECATED_OPENCL_1_2_APIS -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) -# define CL_USE_DEPRECATED_OPENCL_2_0_APIS -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) -# define CL_USE_DEPRECATED_OPENCL_2_1_APIS -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) -# define CL_USE_DEPRECATED_OPENCL_2_2_APIS -#endif - -#ifdef _WIN32 - -#include - -#if defined(CL_HPP_USE_DX_INTEROP) -#include -#include -#endif -#endif // _WIN32 - -#if defined(_MSC_VER) -#include -#endif // _MSC_VER - - // Check for a valid C++ version - -// Need to do both tests here because for some reason __cplusplus is not -// updated in visual studio -#if (!defined(_MSC_VER) && __cplusplus < 201103L) || (defined(_MSC_VER) && _MSC_VER < 1700) -#error Visual studio 2013 or another C++11-supporting compiler required -#endif - -// -#if defined(CL_HPP_USE_CL_DEVICE_FISSION) || defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#else -#include -#endif // !__APPLE__ - -#if (__cplusplus >= 201103L) -#define CL_HPP_NOEXCEPT_ noexcept -#else -#define CL_HPP_NOEXCEPT_ -#endif - -#if defined(_MSC_VER) -# define CL_HPP_DEFINE_STATIC_MEMBER_ __declspec(selectany) -#elif defined(__MINGW32__) -# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((selectany)) -#else -# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((weak)) -#endif // !_MSC_VER - -// Define deprecated prefixes and suffixes to ensure compilation -// in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include -#include -#include -#include -#include -#include - - -// Define a size_type to represent a correctly resolved size_t -#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) -namespace cl { - using size_type = ::size_t; -} // namespace cl -#else // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) -namespace cl { - using size_type = size_t; -} // namespace cl -#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) - - -#if defined(CL_HPP_ENABLE_EXCEPTIONS) -#include -#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) - -#if !defined(CL_HPP_NO_STD_VECTOR) -#include -namespace cl { - template < class T, class Alloc = std::allocator > - using vector = std::vector; -} // namespace cl -#endif // #if !defined(CL_HPP_NO_STD_VECTOR) - -#if !defined(CL_HPP_NO_STD_STRING) -#include -namespace cl { - using string = std::string; -} // namespace cl -#endif // #if !defined(CL_HPP_NO_STD_STRING) - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) -#include -namespace cl { - // Replace unique_ptr and allocate_pointer for internal use - // to allow user to replace them - template - using pointer = std::unique_ptr; -} // namespace cl -#endif -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if !defined(CL_HPP_NO_STD_ARRAY) -#include -namespace cl { - template < class T, size_type N > - using array = std::array; -} // namespace cl -#endif // #if !defined(CL_HPP_NO_STD_ARRAY) - -// Define size_type appropriately to allow backward-compatibility -// use of the old size_t interface class -#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) -namespace cl { - namespace compatibility { - /*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ - template - class size_t - { - private: - size_type data_[N]; - - public: - //! \brief Initialize size_t to all 0s - size_t() - { - for (int i = 0; i < N; ++i) { - data_[i] = 0; - } - } - - size_t(const array &rhs) - { - for (int i = 0; i < N; ++i) { - data_[i] = rhs[i]; - } - } - - size_type& operator[](int index) - { - return data_[index]; - } - - const size_type& operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator size_type* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const size_type* () const { return data_; } - - operator array() const - { - array ret; - - for (int i = 0; i < N; ++i) { - ret[i] = data_[i]; - } - return ret; - } - }; - } // namespace compatibility - - template - using size_t = compatibility::size_t; -} // namespace cl -#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) - -// Helper alias to avoid confusing the macros -namespace cl { - namespace detail { - using size_t_array = array; - } // namespace detail -} // namespace cl - - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - class Memory; - -#define CL_HPP_INIT_CL_EXT_FCN_PTR_(name) \ - if (!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if (!pfn_##name) { \ - } \ - } - -#define CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, name) \ - if (!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - if (!pfn_##name) { \ - } \ - } - - class Program; - class Device; - class Context; - class CommandQueue; - class DeviceCommandQueue; - class Memory; - class Buffer; - class Pipe; - -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - /*! \brief Exception class - * - * This may be thrown by API functions when CL_HPP_ENABLE_EXCEPTIONS is defined. - */ - class Error : public std::exception - { - private: - cl_int err_; - const char * errStr_; - public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. - * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } - }; -#define CL_HPP_ERR_STR_(x) #x -#else -#define CL_HPP_ERR_STR_(x) NULL -#endif // CL_HPP_ENABLE_EXCEPTIONS - - -namespace detail -{ -#if defined(CL_HPP_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - (void) errStr; // suppress unused variable warning - return err; -} -#endif // CL_HPP_ENABLE_EXCEPTIONS -} - - - -//! \cond DOXYGEN_DETAIL -#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR CL_HPP_ERR_STR_(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR CL_HPP_ERR_STR_(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR CL_HPP_ERR_STR_(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR CL_HPP_ERR_STR_(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR CL_HPP_ERR_STR_(clGetContextInfo) -#define __GET_EVENT_INFO_ERR CL_HPP_ERR_STR_(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR CL_HPP_ERR_STR_(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR CL_HPP_ERR_STR_(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR CL_HPP_ERR_STR_(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR CL_HPP_ERR_STR_(clGetKernelInfo) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __GET_KERNEL_ARG_INFO_ERR CL_HPP_ERR_STR_(clGetKernelArgInfo) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __GET_KERNEL_SUB_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelSubGroupInfo) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __GET_KERNEL_WORK_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR CL_HPP_ERR_STR_(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR CL_HPP_ERR_STR_(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR CL_HPP_ERR_STR_(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR CL_HPP_ERR_STR_(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR CL_HPP_ERR_STR_(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR CL_HPP_ERR_STR_(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR CL_HPP_ERR_STR_(clCreateBuffer) -#define __COPY_ERR CL_HPP_ERR_STR_(cl::copy) -#define __CREATE_SUBBUFFER_ERR CL_HPP_ERR_STR_(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetGLObjectInfo) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_IMAGE_ERR CL_HPP_ERR_STR_(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR CL_HPP_ERR_STR_(Incorrect image dimensions) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR CL_HPP_ERR_STR_(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR CL_HPP_ERR_STR_(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR CL_HPP_ERR_STR_(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR CL_HPP_ERR_STR_(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clWaitForEvents) - -#define __CREATE_KERNEL_ERR CL_HPP_ERR_STR_(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR CL_HPP_ERR_STR_(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR CL_HPP_ERR_STR_(clCreateProgramWithSource) -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __CREATE_PROGRAM_WITH_BINARY_ERR CL_HPP_ERR_STR_(clCreateProgramWithBinary) -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR CL_HPP_ERR_STR_(clCreateProgramWithBuiltInKernels) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __BUILD_PROGRAM_ERR CL_HPP_ERR_STR_(clBuildProgram) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __COMPILE_PROGRAM_ERR CL_HPP_ERR_STR_(clCompileProgram) -#define __LINK_PROGRAM_ERR CL_HPP_ERR_STR_(clLinkProgram) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_KERNELS_IN_PROGRAM_ERR CL_HPP_ERR_STR_(clCreateKernelsInProgram) - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateCommandQueueWithProperties) -#define __CREATE_SAMPLER_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateSamplerWithProperties) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __SET_COMMAND_QUEUE_PROPERTY_ERR CL_HPP_ERR_STR_(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR CL_HPP_ERR_STR_(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR CL_HPP_ERR_STR_(clEnqueueNDRangeKernel) -#define __ENQUEUE_NATIVE_KERNEL CL_HPP_ERR_STR_(clEnqueueNativeKernel) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR CL_HPP_ERR_STR_(clEnqueueMigrateMemObjects) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __ENQUEUE_MIGRATE_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMigrateMem) -#define __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clSetDefaultDeviceCommandQueue) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 - - -#define __ENQUEUE_ACQUIRE_GL_ERR CL_HPP_ERR_STR_(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR CL_HPP_ERR_STR_(clEnqueueReleaseGLObjects) - -#define __CREATE_PIPE_ERR CL_HPP_ERR_STR_(clCreatePipe) -#define __GET_PIPE_INFO_ERR CL_HPP_ERR_STR_(clGetPipeInfo) - - -#define __RETAIN_ERR CL_HPP_ERR_STR_(Retain Object) -#define __RELEASE_ERR CL_HPP_ERR_STR_(Release Object) -#define __FLUSH_ERR CL_HPP_ERR_STR_(clFlush) -#define __FINISH_ERR CL_HPP_ERR_STR_(clFinish) -#define __VECTOR_CAPACITY_ERR CL_HPP_ERR_STR_(Vector capacity error) - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __GET_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetHostTimer) -#define __GET_DEVICE_AND_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetDeviceAndHostTimer) -#endif -#if CL_HPP_TARGET_OPENCL_VERSION >= 220 -#define __SET_PROGRAM_RELEASE_CALLBACK_ERR CL_HPP_ERR_STR_(clSetProgramReleaseCallback) -#define __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR CL_HPP_ERR_STR_(clSetProgramSpecializationConstant) -#endif - - -/** - * CL 1.2 version that uses device fission. - */ -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevices) -#else -#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevicesEXT) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __ENQUEUE_MARKER_ERR CL_HPP_ERR_STR_(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR CL_HPP_ERR_STR_(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR CL_HPP_ERR_STR_(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR CL_HPP_ERR_STR_(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR CL_HPP_ERR_STR_(clCreateImage3D) -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -/** - * Deprecated APIs for 2.0 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) -#define __CREATE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clCreateCommandQueue) -#define __ENQUEUE_TASK_ERR CL_HPP_ERR_STR_(clEnqueueTask) -#define __CREATE_SAMPLER_ERR CL_HPP_ERR_STR_(clCreateSampler) -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -/** - * CL 1.2 marker and barrier commands - */ -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __ENQUEUE_MARKER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueBarrierWithWaitList) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __CLONE_KERNEL_ERR CL_HPP_ERR_STR_(clCloneKernel) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 - -#endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - - -namespace detail { - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) -{ - return f(name, sizeof(T), param, NULL); -} - -// Specialized for getInfo -// Assumes that the output vector was correctly resized on the way in -template -inline cl_int getInfoHelper(Func f, cl_uint name, vector>* param, int) -{ - if (name != CL_PROGRAM_BINARIES) { - return CL_INVALID_VALUE; - } - if (param) { - // Create array of pointers, calculate total size and pass pointer array in - size_type numBinaries = param->size(); - vector binariesPointers(numBinaries); - - for (size_type i = 0; i < numBinaries; ++i) - { - binariesPointers[i] = (*param)[i].data(); - } - - cl_int err = f(name, numBinaries * sizeof(unsigned char*), binariesPointers.data(), NULL); - - if (err != CL_SUCCESS) { - return err; - } - } - - - return CL_SUCCESS; -} - -// Specialized getInfoHelper for vector params -template -inline cl_int getInfoHelper(Func f, cl_uint name, vector* param, long) -{ - size_type required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - const size_type elements = required / sizeof(T); - - // Temporary to avoid changing param on an error - vector localData(elements); - err = f(name, required, localData.data(), NULL); - if (err != CL_SUCCESS) { - return err; - } - if (param) { - *param = std::move(localData); - } - - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper( - Func f, cl_uint name, vector* param, int, typename T::cl_type = 0) -{ - size_type required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - const size_type elements = required / sizeof(typename T::cl_type); - - vector value(elements); - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) { - return err; - } - - if (param) { - // Assign to convert CL type to T for each element - param->resize(elements); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < elements; i++) { - (*param)[i] = T(value[i], true); - } - } - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for string params -template -inline cl_int getInfoHelper(Func f, cl_uint name, string* param, long) -{ - size_type required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - // std::string has a constant data member - // a char vector does not - if (required > 0) { - vector value(required); - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) { - return err; - } - if (param) { - param->assign(begin(value), prev(end(value))); - } - } - else if (param) { - param->assign(""); - } - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for clsize_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, array* param, long) -{ - size_type required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - size_type elements = required / sizeof(size_type); - vector value(elements, 0); - - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) { - return err; - } - - // Bound the copy with N to prevent overruns - // if passed N > than the amount copied - if (elements > N) { - elements = N; - } - for (size_type i = 0; i < elements; ++i) { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, NULL); - if (err != CL_SUCCESS) { - return err; - } - *param = value; - if (value != NULL) - { - err = param->retain(); - if (err != CL_SUCCESS) { - return err; - } - } - return CL_SUCCESS; -} - -#define CL_HPP_PARAM_NAME_INFO_1_0_(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, string) \ - F(cl_platform_info, CL_PLATFORM_VERSION, string) \ - F(cl_platform_info, CL_PLATFORM_NAME, string) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, string) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, string) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_type) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, cl::vector) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, size_type) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_type) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, string) \ - F(cl_device_info, CL_DEVICE_VENDOR, string) \ - F(cl_device_info, CL_DRIVER_VERSION, string) \ - F(cl_device_info, CL_DEVICE_PROFILE, string) \ - F(cl_device_info, CL_DEVICE_VERSION, string) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, string) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, cl::vector) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, cl::vector) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, size_type) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, size_type) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, size_type) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, size_type) \ - F(cl_image_info, CL_IMAGE_WIDTH, size_type) \ - F(cl_image_info, CL_IMAGE_HEIGHT, size_type) \ - F(cl_image_info, CL_IMAGE_DEPTH, size_type) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, cl::vector) \ - F(cl_program_info, CL_PROGRAM_SOURCE, string) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, cl::vector) \ - F(cl_program_info, CL_PROGRAM_BINARIES, cl::vector>) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, string) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, string) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, string) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, size_type) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::detail::size_t_array) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - - -#define CL_HPP_PARAM_NAME_INFO_1_1_(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, string) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, size_type) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) - -#define CL_HPP_PARAM_NAME_INFO_1_2_(F) \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, size_type) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, string) \ - \ - F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ - \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, string) \ - \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, string) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ - \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, size_type) \ - F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ - \ - F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ - F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ - F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) - -#define CL_HPP_PARAM_NAME_INFO_2_0_(F) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_QUEUES, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_EVENTS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PIPE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities) \ - F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_SIZE, cl_uint) \ - F(cl_mem_info, CL_MEM_USES_SVM_POINTER, cl_bool) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, size_type) \ - F(cl_pipe_info, CL_PIPE_PACKET_SIZE, cl_uint) \ - F(cl_pipe_info, CL_PIPE_MAX_PACKETS, cl_uint) - -#define CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(F) \ - F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, size_type) - -#define CL_HPP_PARAM_NAME_INFO_IL_KHR_(F) \ - F(cl_device_info, CL_DEVICE_IL_VERSION_KHR, string) \ - F(cl_program_info, CL_PROGRAM_IL_KHR, cl::vector) - -#define CL_HPP_PARAM_NAME_INFO_2_1_(F) \ - F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, size_type) \ - F(cl_program_info, CL_PROGRAM_IL, cl::vector) \ - F(cl_kernel_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ - F(cl_kernel_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) \ - F(cl_device_info, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint) \ - F(cl_device_info, CL_DEVICE_IL_VERSION, string) \ - F(cl_device_info, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_bool) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE_DEFAULT, cl::DeviceCommandQueue) \ - F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) - -#define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ - F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ - F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, cl_bool) - -#define CL_HPP_PARAM_NAME_DEVICE_FISSION_(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, cl::vector) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, cl::vector) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, cl::vector) - -template -struct param_traits {}; - -#define CL_HPP_DECLARE_PARAM_TRAITS_(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -CL_HPP_PARAM_NAME_INFO_1_0_(CL_HPP_DECLARE_PARAM_TRAITS_) -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 -CL_HPP_PARAM_NAME_INFO_1_1_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -CL_HPP_PARAM_NAME_INFO_1_2_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -CL_HPP_PARAM_NAME_INFO_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -CL_HPP_PARAM_NAME_INFO_2_1_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 -#if CL_HPP_TARGET_OPENCL_VERSION >= 220 -CL_HPP_PARAM_NAME_INFO_2_2_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 - -#if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 -CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // #if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 - -#if defined(CL_HPP_USE_IL_KHR) -CL_HPP_PARAM_NAME_INFO_IL_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // #if defined(CL_HPP_USE_IL_KHR) - - -// Flags deprecated in OpenCL 2.0 -#define CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(F) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) - -#define CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(F) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) - -#define CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(F) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) - -// Include deprecated query flags based on versions -// Only include deprecated 1.0 flags if 2.0 not active as there is an enum clash -#if CL_HPP_TARGET_OPENCL_VERSION > 100 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 && CL_HPP_TARGET_OPENCL_VERSION < 200 -CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 110 -#if CL_HPP_TARGET_OPENCL_VERSION > 110 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 -CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 -#if CL_HPP_TARGET_OPENCL_VERSION > 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 -CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - -#if defined(CL_HPP_USE_CL_DEVICE_FISSION) -CL_HPP_PARAM_NAME_DEVICE_FISSION_(CL_HPP_DECLARE_PARAM_TRAITS_); -#endif // CL_HPP_USE_CL_DEVICE_FISSION - -#ifdef CL_PLATFORM_ICD_SUFFIX_KHR -CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) -#endif - -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) -#endif - -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, vector) -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) -#endif - -#ifdef CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, cl_ulong) -#endif -#ifdef CL_DEVICE_JOB_SLOTS_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_JOB_SLOTS_ARM, cl_uint) -#endif - -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) -#endif -#ifdef CL_DEVICE_WARP_SIZE_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) -#endif -#ifdef CL_DEVICE_GPU_OVERLAP_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) -#endif -#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) -#endif -#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) -#endif - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return getInfoHelper(f, name, param, 0); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, size_type size, void* value, size_type* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, size_type size, void* value, size_type* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return getInfoHelper(f0, name, param, 0); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return getInfoHelper(f0, name, param, 0); -} - - -template -struct ReferenceHandler -{ }; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/** - * OpenCL 1.2 devices do have retain/release. - */ -template <> -struct ReferenceHandler -{ - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int retain(cl_device_id device) - { return ::clRetainDevice(device); } - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int release(cl_device_id device) - { return ::clReleaseDevice(device); } -}; -#else // CL_HPP_TARGET_OPENCL_VERSION >= 120 -/** - * OpenCL 1.1 devices do not have retain/release. - */ -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_SUCCESS; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_SUCCESS; } -}; -#endif // ! (CL_HPP_TARGET_OPENCL_VERSION >= 120) - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_SUCCESS; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_SUCCESS; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 -// Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const vector &versionInfo) -{ - int highVersion = 0; - int lowVersion = 0; - int index = 7; - while(versionInfo[index] != '.' ) { - highVersion *= 10; - highVersion += versionInfo[index]-'0'; - ++index; - } - ++index; - while(versionInfo[index] != ' ' && versionInfo[index] != '\0') { - lowVersion *= 10; - lowVersion += versionInfo[index]-'0'; - ++index; - } - return (highVersion << 16) | lowVersion; -} - -static cl_uint getPlatformVersion(cl_platform_id platform) -{ - size_type size = 0; - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); - - vector versionInfo(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, versionInfo.data(), &size); - return getVersion(versionInfo); -} - -static cl_uint getDevicePlatformVersion(cl_device_id device) -{ - cl_platform_id platform; - clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); - return getPlatformVersion(platform); -} - -static cl_uint getContextPlatformVersion(cl_context context) -{ - // The platform cannot be queried directly, so we first have to grab a - // device and obtain its context - size_type size = 0; - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - if (size == 0) - return 0; - vector devices(size/sizeof(cl_device_id)); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), NULL); - return getDevicePlatformVersion(devices[0]); -} -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - Wrapper(const cl_type &obj, bool retainObject) : object_(obj) - { - if (retainObject) { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - detail::errHandler(retain(), __RETAIN_ERR); - } - - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ - { - object_ = rhs.object_; - rhs.object_ = NULL; - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - detail::errHandler(retain(), __RETAIN_ERR); - } - return *this; - } - - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - rhs.object_ = NULL; - } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs; - return *this; - } - - const cl_type& operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - - cl_type get() const { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - cl_int retain() const - { - if (object_ != nullptr) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if (object_ != nullptr) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -template <> -class Wrapper -{ -public: - typedef cl_device_id cl_type; - -protected: - cl_type object_; - bool referenceCountable_; - - static bool isReferenceCountable(cl_device_id device) - { - bool retVal = false; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - if (device != NULL) { - int version = getDevicePlatformVersion(device); - if(version > ((1 << 16) + 1)) { - retVal = true; - } - } -#else // CL_HPP_MINIMUM_OPENCL_VERSION < 120 - retVal = true; -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - return retVal; - } - -public: - Wrapper() : object_(NULL), referenceCountable_(false) - { - } - - Wrapper(const cl_type &obj, bool retainObject) : - object_(obj), - referenceCountable_(false) - { - referenceCountable_ = isReferenceCountable(obj); - - if (retainObject) { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - - ~Wrapper() - { - release(); - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - referenceCountable_ = isReferenceCountable(object_); - detail::errHandler(retain(), __RETAIN_ERR); - } - - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ - { - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - detail::errHandler(retain(), __RETAIN_ERR); - } - return *this; - } - - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs; - referenceCountable_ = isReferenceCountable(object_); - return *this; - } - - const cl_type& operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - - cl_type get() const { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - template - friend inline cl_int getInfoHelper(Func, cl_uint, vector*, int, typename U::cl_type); - - cl_int retain() const - { - if( object_ != nullptr && referenceCountable_ ) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if (object_ != nullptr && referenceCountable_) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -template -inline bool operator==(const Wrapper &lhs, const Wrapper &rhs) -{ - return lhs() == rhs(); -} - -template -inline bool operator!=(const Wrapper &lhs, const Wrapper &rhs) -{ - return !operator==(lhs, rhs); -} - -} // namespace detail -//! \endcond - - -using BuildLogType = vector::param_type>>; -#if defined(CL_HPP_ENABLE_EXCEPTIONS) -/** -* Exception class for build errors to carry build info -*/ -class BuildError : public Error -{ -private: - BuildLogType buildLogs; -public: - BuildError(cl_int err, const char * errStr, const BuildLogType &vec) : Error(err, errStr), buildLogs(vec) - { - } - - BuildLogType getBuildLog() const - { - return buildLogs; - } -}; -namespace detail { - static inline cl_int buildErrHandler( - cl_int err, - const char * errStr, - const BuildLogType &buildLogs) - { - if (err != CL_SUCCESS) { - throw BuildError(err, errStr, buildLogs); - } - return err; - } -} // namespace detail - -#else -namespace detail { - static inline cl_int buildErrHandler( - cl_int err, - const char * errStr, - const BuildLogType &buildLogs) - { - (void)buildLogs; // suppress unused variable warning - (void)errStr; - return err; - } -} // namespace detail -#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) - - -/*! \stuct ImageFormat - * \brief Adds constructors and member functions for cl_image_format. - * - * \see cl_image_format - */ -struct ImageFormat : public cl_image_format -{ - //! \brief Default constructor - performs no initialization. - ImageFormat(){} - - //! \brief Initializing constructor. - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - //! \brief Assignment operator. - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \brief Class interface for cl_device_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_device_id - */ -class Device : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static Device default_; - static cl_int default_error_; - - /*! \brief Create the default context. - * - * This sets @c default_ and @c default_error_. It does not throw - * @c cl::Error. - */ - static void makeDefault(); - - /*! \brief Create the default platform from a provided platform. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const Device &p) { - default_ = p; - } - -public: -#ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = Device(); - } -#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - - //! \brief Default constructor - initializes to NULL. - Device() : detail::Wrapper() { } - - /*! \brief Constructor from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - explicit Device(const cl_device_id &device, bool retainObject = false) : - detail::Wrapper(device, retainObject) { } - - /*! \brief Returns the first device on the default context. - * - * \see Context::getDefault() - */ - static Device getDefault( - cl_int *errResult = NULL) - { - std::call_once(default_initialized_, makeDefault); - detail::errHandler(default_error_); - if (errResult != NULL) { - *errResult = default_error_; - } - return default_; - } - - /** - * Modify the default device to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default device. - * Should be compared to the passed value to ensure that it was updated. - */ - static Device setDefault(const Device &default_device) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_device)); - detail::errHandler(default_error_); - return default_; - } - - /*! \brief Assignment operator from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const cl_device_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device(const Device& dev) : detail::Wrapper(dev) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device& operator = (const Device &dev) - { - detail::Wrapper::operator=(dev); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Device(Device&& dev) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(dev)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Device& operator = (Device &&dev) - { - detail::Wrapper::operator=(std::move(dev)); - return *this; - } - - //! \brief Wrapper for clGetDeviceInfo(). - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - //! \brief Wrapper for clGetDeviceInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /** - * Return the current value of the host clock as seen by the device. - * The resolution of the device timer may be queried with the - * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. - * @return The host timer value. - */ - cl_ulong getHostTimer(cl_int *error = nullptr) - { - cl_ulong retVal = 0; - cl_int err = - clGetHostTimer(this->get(), &retVal); - detail::errHandler( - err, - __GET_HOST_TIMER_ERR); - if (error) { - *error = err; - } - return retVal; - } - - /** - * Return a synchronized pair of host and device timestamps as seen by device. - * Use to correlate the clocks and get the host timer only using getHostTimer - * as a lower cost mechanism in between calls. - * The resolution of the host timer may be queried with the - * CL_PLATFORM_HOST_TIMER_RESOLUTION query. - * The resolution of the device timer may be queried with the - * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. - * @return A pair of (device timer, host timer) timer values. - */ - std::pair getDeviceAndHostTimer(cl_int *error = nullptr) - { - std::pair retVal; - cl_int err = - clGetDeviceAndHostTimer(this->get(), &(retVal.first), &(retVal.second)); - detail::errHandler( - err, - __GET_DEVICE_AND_HOST_TIMER_ERR); - if (error) { - *error = err; - } - return retVal; - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - /** - * CL 1.2 version - */ -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - //! \brief Wrapper for clCreateSubDevices(). - cl_int createSubDevices( - const cl_device_partition_property * properties, - vector* devices) - { - cl_uint n = 0; - cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - - vector ids(n); - err = clCreateSubDevices(object_, properties, n, ids.data(), NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - - // Cannot trivially assign because we need to capture intermediates - // with safe construction - if (devices) { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) { - // We do not need to retain because this device is being created - // by the runtime - (*devices)[i] = Device(ids[i], false); - } - } - - return CL_SUCCESS; - } -#elif defined(CL_HPP_USE_CL_DEVICE_FISSION) - -/** - * CL 1.1 version that uses device fission extension. - */ - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - vector* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - - vector ids(n); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids.data(), NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - // Cannot trivially assign because we need to capture intermediates - // with safe construction - if (devices) { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) { - // We do not need to retain because this device is being created - // by the runtime - (*devices)[i] = Device(ids[i], false); - } - } - return CL_SUCCESS; - } -#endif // defined(CL_HPP_USE_CL_DEVICE_FISSION) -}; - -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Device::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ Device Device::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Device::default_error_ = CL_SUCCESS; - -/*! \brief Class interface for cl_platform_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_platform_id - */ -class Platform : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static Platform default_; - static cl_int default_error_; - - /*! \brief Create the default context. - * - * This sets @c default_ and @c default_error_. It does not throw - * @c cl::Error. - */ - static void makeDefault() { - /* Throwing an exception from a call_once invocation does not do - * what we wish, so we catch it and save the error. - */ -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - try -#endif - { - // If default wasn't passed ,generate one - // Otherwise set it - cl_uint n = 0; - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - default_error_ = err; - return; - } - if (n == 0) { - default_error_ = CL_INVALID_PLATFORM; - return; - } - - vector ids(n); - err = ::clGetPlatformIDs(n, ids.data(), NULL); - if (err != CL_SUCCESS) { - default_error_ = err; - return; - } - - default_ = Platform(ids[0]); - } -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } -#endif - } - - /*! \brief Create the default platform from a provided platform. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const Platform &p) { - default_ = p; - } - -public: -#ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = Platform(); - } -#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - - //! \brief Default constructor - initializes to NULL. - Platform() : detail::Wrapper() { } - - /*! \brief Constructor from cl_platform_id. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This simply copies the platform ID value, which is an inexpensive operation. - */ - explicit Platform(const cl_platform_id &platform, bool retainObject = false) : - detail::Wrapper(platform, retainObject) { } - - /*! \brief Assignment operator from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const cl_platform_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - static Platform getDefault( - cl_int *errResult = NULL) - { - std::call_once(default_initialized_, makeDefault); - detail::errHandler(default_error_); - if (errResult != NULL) { - *errResult = default_error_; - } - return default_; - } - - /** - * Modify the default platform to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default platform. - * Should be compared to the passed value to ensure that it was updated. - */ - static Platform setDefault(const Platform &default_platform) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_platform)); - detail::errHandler(default_error_); - return default_; - } - - //! \brief Wrapper for clGetPlatformInfo(). - cl_int getInfo(cl_platform_info name, string* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - //! \brief Wrapper for clGetPlatformInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of devices for this platform. - * - * Wraps clGetDeviceIDs(). - */ - cl_int getDevices( - cl_device_type type, - vector* devices) const - { - cl_uint n = 0; - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - vector ids(n); - err = ::clGetDeviceIDs(object_, type, n, ids.data(), NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - // Cannot trivially assign because we need to capture intermediates - // with safe construction - // We must retain things we obtain from the API to avoid releasing - // API-owned objects. - if (devices) { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) { - (*devices)[i] = Device(ids[i], true); - } - } - return CL_SUCCESS; - } - -#if defined(CL_HPP_USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - vector* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(object_, clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - vector ids(n); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids.data(), - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - // Cannot trivially assign because we need to capture intermediates - // with safe construction - // We must retain things we obtain from the API to avoid releasing - // API-owned objects. - if (devices) { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) { - (*devices)[i] = Device(ids[i], true); - } - } - return CL_SUCCESS; - } -#endif - - /*! \brief Gets a list of available platforms. - * - * Wraps clGetPlatformIDs(). - */ - static cl_int get( - vector* platforms) - { - cl_uint n = 0; - - if( platforms == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - vector ids(n); - err = ::clGetPlatformIDs(n, ids.data(), NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - if (platforms) { - platforms->resize(ids.size()); - - // Platforms don't reference count - for (size_type i = 0; i < ids.size(); i++) { - (*platforms)[i] = Platform(ids[i]); - } - } - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static cl_int get( - Platform * platform) - { - cl_int err; - Platform default_platform = Platform::getDefault(&err); - if (platform) { - *platform = default_platform; - } - return err; - } - - /*! \brief Gets the first available platform, returning it by value. - * - * \return Returns a valid platform if one is available. - * If no platform is available will return a null platform. - * Throws an exception if no platforms are available - * or an error condition occurs. - * Wraps clGetPlatformIDs(), returning the first result. - */ - static Platform get( - cl_int * errResult = NULL) - { - cl_int err; - Platform default_platform = Platform::getDefault(&err); - if (errResult) { - *errResult = err; - } - return default_platform; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - //! \brief Wrapper for clUnloadCompiler(). - cl_int - unloadCompiler() - { - return ::clUnloadPlatformCompiler(object_); - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -}; // class Platform - -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Platform::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ Platform Platform::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; - - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -/** - * Unload the OpenCL compiler. - * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -/*! \brief Class interface for cl_context. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_context as the original. For details, see - * clRetainContext() and clReleaseContext(). - * - * \see cl_context - */ -class Context - : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static Context default_; - static cl_int default_error_; - - /*! \brief Create the default context from the default device type in the default platform. - * - * This sets @c default_ and @c default_error_. It does not throw - * @c cl::Error. - */ - static void makeDefault() { - /* Throwing an exception from a call_once invocation does not do - * what we wish, so we catch it and save the error. - */ -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - try -#endif - { -#if !defined(__APPLE__) && !defined(__MACOS) - const Platform &p = Platform::getDefault(); - cl_platform_id defaultPlatform = p(); - cl_context_properties properties[3] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)defaultPlatform, 0 - }; -#else // #if !defined(__APPLE__) && !defined(__MACOS) - cl_context_properties *properties = nullptr; -#endif // #if !defined(__APPLE__) && !defined(__MACOS) - - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - properties, - NULL, - NULL, - &default_error_); - } -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } -#endif - } - - - /*! \brief Create the default context from a provided Context. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const Context &c) { - default_ = c; - } - -public: -#ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = Context(); - } -#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - - /*! \brief Constructs a context including a list of specified devices. - * - * Wraps clCreateContext(). - */ - Context( - const vector& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - size_type, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - size_type numDevices = devices.size(); - vector deviceIDs(numDevices); - - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateContext( - properties, (cl_uint) numDevices, - deviceIDs.data(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - const Device& device, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - size_type, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - cl_device_id deviceID = device(); - - object_ = ::clCreateContext( - properties, 1, - &deviceID, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a context including all or a subset of devices of a specified type. - * - * Wraps clCreateContextFromType(). - */ - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - size_type, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - -#if !defined(__APPLE__) && !defined(__MACOS) - cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; - - if (properties == NULL) { - // Get a valid platform ID as we cannot send in a blank one - vector platforms; - error = Platform::get(&platforms); - if (error != CL_SUCCESS) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - // Check the platforms we found for a device of our specified type - cl_context_properties platform_id = 0; - for (unsigned int i = 0; i < platforms.size(); i++) { - - vector devices; - -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - try { -#endif - - error = platforms[i].getDevices(type, &devices); - -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - } catch (cl::Error& e) { - error = e.err(); - } - // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type - // We do error checking next anyway, and can throw there if needed -#endif - - // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - if (devices.size() > 0) { - platform_id = (cl_context_properties)platforms[i](); - break; - } - } - - if (platform_id == 0) { - detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = CL_DEVICE_NOT_FOUND; - } - return; - } - - prop[1] = platform_id; - properties = &prop[0]; - } -#endif - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context(const Context& ctx) : detail::Wrapper(ctx) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context& operator = (const Context &ctx) - { - detail::Wrapper::operator=(ctx); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Context(Context&& ctx) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(ctx)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Context& operator = (Context &&ctx) - { - detail::Wrapper::operator=(std::move(ctx)); - return *this; - } - - - /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. - * - * \note All calls to this function return the same cl_context as the first. - */ - static Context getDefault(cl_int * err = NULL) - { - std::call_once(default_initialized_, makeDefault); - detail::errHandler(default_error_); - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - /** - * Modify the default context to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default context. - * Should be compared to the passed value to ensure that it was updated. - */ - static Context setDefault(const Context &default_context) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_context)); - detail::errHandler(default_error_); - return default_; - } - - //! \brief Default constructor - initializes to NULL. - Context() : detail::Wrapper() { } - - /*! \brief Constructor from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_context - * into the new Context object. - */ - explicit Context(const cl_context& context, bool retainObject = false) : - detail::Wrapper(context, retainObject) { } - - /*! \brief Assignment operator from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseContext() on the value previously held by this instance. - */ - Context& operator = (const cl_context& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetContextInfo(). - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - //! \brief Wrapper for clGetContextInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of supported image formats. - * - * Wraps clGetSupportedImageFormats(). - */ - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - vector* formats) const - { - cl_uint numEntries; - - if (!formats) { - return CL_SUCCESS; - } - - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - if (numEntries > 0) { - vector value(numEntries); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*)value.data(), - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(begin(value), end(value)); - } - else { - // If no values are being returned, ensure an empty vector comes back - formats->clear(); - } - - return CL_SUCCESS; - } -}; - -inline void Device::makeDefault() -{ - /* Throwing an exception from a call_once invocation does not do - * what we wish, so we catch it and save the error. - */ -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - try -#endif - { - cl_int error = 0; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - default_error_ = error; - } - else { - default_ = context.getInfo()[0]; - default_error_ = CL_SUCCESS; - } - } -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } -#endif -} - -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Context::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ Context Context::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Context::default_error_ = CL_SUCCESS; - -/*! \brief Class interface for cl_event. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_event as the original. For details, see - * clRetainEvent() and clReleaseEvent(). - * - * \see cl_event - */ -class Event : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Event() : detail::Wrapper() { } - - /*! \brief Constructor from cl_event - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This effectively transfers ownership of a refcount on the cl_event - * into the new Event object. - */ - explicit Event(const cl_event& event, bool retainObject = false) : - detail::Wrapper(event, retainObject) { } - - /*! \brief Assignment operator from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseEvent() on the value previously held by this instance. - */ - Event& operator = (const cl_event& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetEventInfo(). - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - //! \brief Wrapper for clGetEventInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - //! \brief Wrapper for clGetEventProfilingInfo(). - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Blocks the calling thread until this event completes. - * - * Wraps clWaitForEvents(). - */ - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 - /*! \brief Registers a user callback function for a specific command execution status. - * - * Wraps clSetEventCallback(). - */ - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 - - /*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ - static cl_int - waitForEvents(const vector& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); - } -}; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 -/*! \brief Class interface for user events (a subset of cl_event's). - * - * See Event for details about copy semantics, etc. - */ -class UserEvent : public Event -{ -public: - /*! \brief Constructs a user event on a given context. - * - * Wraps clCreateUserEvent(). - */ - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - UserEvent() : Event() { } - - /*! \brief Sets the execution status of a user event object. - * - * Wraps clSetUserEventStatus(). - */ - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 - -/*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ -inline static cl_int -WaitForEvents(const vector& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \brief Class interface for cl_mem. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_mem as the original. For details, see - * clRetainMemObject() and clReleaseMemObject(). - * - * \see cl_mem - */ -class Memory : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Memory() : detail::Wrapper() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * Optionally transfer ownership of a refcount on the cl_mem - * into the new Memory object. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * - * See Memory for further details. - */ - explicit Memory(const cl_mem& memory, bool retainObject) : - detail::Wrapper(memory, retainObject) { } - - /*! \brief Assignment operator from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseMemObject() on the value previously held by this instance. - */ - Memory& operator = (const cl_mem& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory(const Memory& mem) : detail::Wrapper(mem) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory& operator = (const Memory &mem) - { - detail::Wrapper::operator=(mem); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory(Memory&& mem) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(mem)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory& operator = (Memory &&mem) - { - detail::Wrapper::operator=(std::move(mem)); - return *this; - } - - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 - /*! \brief Registers a callback function to be called when the memory object - * is no longer needed. - * - * Wraps clSetMemObjectDestructorCallback(). - * - * Repeated calls to this function, for a given cl_mem value, will append - * to the list of functions called (in reverse order) when memory object's - * resources are freed and the memory object is deleted. - * - * \note - * The registered callbacks are associated with the underlying cl_mem - * value - not the Memory class instance. - */ - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 - -}; - -// Pre-declare copy functions -class Buffer; -template< typename IteratorType > -cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -namespace detail -{ - class SVMTraitNull - { - public: - static cl_svm_mem_flags getSVMMemFlags() - { - return 0; - } - }; -} // namespace detail - -template -class SVMTraitReadWrite -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_READ_WRITE | - Trait::getSVMMemFlags(); - } -}; - -template -class SVMTraitReadOnly -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_READ_ONLY | - Trait::getSVMMemFlags(); - } -}; - -template -class SVMTraitWriteOnly -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_WRITE_ONLY | - Trait::getSVMMemFlags(); - } -}; - -template> -class SVMTraitCoarse -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return Trait::getSVMMemFlags(); - } -}; - -template> -class SVMTraitFine -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_SVM_FINE_GRAIN_BUFFER | - Trait::getSVMMemFlags(); - } -}; - -template> -class SVMTraitAtomic -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return - CL_MEM_SVM_FINE_GRAIN_BUFFER | - CL_MEM_SVM_ATOMICS | - Trait::getSVMMemFlags(); - } -}; - -// Pre-declare SVM map function -template -inline cl_int enqueueMapSVM( - T* ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = NULL, - Event* event = NULL); - -/** - * STL-like allocator class for managing SVM objects provided for convenience. - * - * Note that while this behaves like an allocator for the purposes of constructing vectors and similar objects, - * care must be taken when using with smart pointers. - * The allocator should not be used to construct a unique_ptr if we are using coarse-grained SVM mode because - * the coarse-grained management behaviour would behave incorrectly with respect to reference counting. - * - * Instead the allocator embeds a Deleter which may be used with unique_ptr and is used - * with the allocate_shared and allocate_ptr supplied operations. - */ -template -class SVMAllocator { -private: - Context context_; - -public: - typedef T value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - template - struct rebind - { - typedef SVMAllocator other; - }; - - template - friend class SVMAllocator; - - SVMAllocator() : - context_(Context::getDefault()) - { - } - - explicit SVMAllocator(cl::Context context) : - context_(context) - { - } - - - SVMAllocator(const SVMAllocator &other) : - context_(other.context_) - { - } - - template - SVMAllocator(const SVMAllocator &other) : - context_(other.context_) - { - } - - ~SVMAllocator() - { - } - - pointer address(reference r) CL_HPP_NOEXCEPT_ - { - return std::addressof(r); - } - - const_pointer address(const_reference r) CL_HPP_NOEXCEPT_ - { - return std::addressof(r); - } - - /** - * Allocate an SVM pointer. - * - * If the allocator is coarse-grained, this will take ownership to allow - * containers to correctly construct data in place. - */ - pointer allocate( - size_type size, - typename cl::SVMAllocator::const_pointer = 0) - { - // Allocate memory with default alignment matching the size of the type - void* voidPointer = - clSVMAlloc( - context_(), - SVMTrait::getSVMMemFlags(), - size*sizeof(T), - 0); - pointer retValue = reinterpret_cast( - voidPointer); -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - if (!retValue) { - std::bad_alloc excep; - throw excep; - } -#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) - - // If allocation was coarse-grained then map it - if (!(SVMTrait::getSVMMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { - cl_int err = enqueueMapSVM(retValue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, size*sizeof(T)); - if (err != CL_SUCCESS) { - std::bad_alloc excep; - throw excep; - } - } - - // If exceptions disabled, return null pointer from allocator - return retValue; - } - - void deallocate(pointer p, size_type) - { - clSVMFree(context_(), p); - } - - /** - * Return the maximum possible allocation size. - * This is the minimum of the maximum sizes of all devices in the context. - */ - size_type max_size() const CL_HPP_NOEXCEPT_ - { - size_type maxSize = std::numeric_limits::max() / sizeof(T); - - for (const Device &d : context_.getInfo()) { - maxSize = std::min( - maxSize, - static_cast(d.getInfo())); - } - - return maxSize; - } - - template< class U, class... Args > - void construct(U* p, Args&&... args) - { - new(p)T(args...); - } - - template< class U > - void destroy(U* p) - { - p->~U(); - } - - /** - * Returns true if the contexts match. - */ - inline bool operator==(SVMAllocator const& rhs) - { - return (context_==rhs.context_); - } - - inline bool operator!=(SVMAllocator const& a) - { - return !operator==(a); - } -}; // class SVMAllocator return cl::pointer(tmp, detail::Deleter{alloc, copies}); - - -template -class SVMAllocator { -public: - typedef void value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - - template - struct rebind - { - typedef SVMAllocator other; - }; - - template - friend class SVMAllocator; -}; - -#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) -namespace detail -{ - template - class Deleter { - private: - Alloc alloc_; - size_type copies_; - - public: - typedef typename std::allocator_traits::pointer pointer; - - Deleter(const Alloc &alloc, size_type copies) : alloc_{ alloc }, copies_{ copies } - { - } - - void operator()(pointer ptr) const { - Alloc tmpAlloc{ alloc_ }; - std::allocator_traits::destroy(tmpAlloc, std::addressof(*ptr)); - std::allocator_traits::deallocate(tmpAlloc, ptr, copies_); - } - }; -} // namespace detail - -/** - * Allocation operation compatible with std::allocate_ptr. - * Creates a unique_ptr by default. - * This requirement is to ensure that the control block is not - * allocated in memory inaccessible to the host. - */ -template -cl::pointer> allocate_pointer(const Alloc &alloc_, Args&&... args) -{ - Alloc alloc(alloc_); - static const size_type copies = 1; - - // Ensure that creation of the management block and the - // object are dealt with separately such that we only provide a deleter - - T* tmp = std::allocator_traits::allocate(alloc, copies); - if (!tmp) { - std::bad_alloc excep; - throw excep; - } - try { - std::allocator_traits::construct( - alloc, - std::addressof(*tmp), - std::forward(args)...); - - return cl::pointer>(tmp, detail::Deleter{alloc, copies}); - } - catch (std::bad_alloc& b) - { - std::allocator_traits::deallocate(alloc, tmp, copies); - throw; - } -} - -template< class T, class SVMTrait, class... Args > -cl::pointer>> allocate_svm(Args... args) -{ - SVMAllocator alloc; - return cl::allocate_pointer(alloc, args...); -} - -template< class T, class SVMTrait, class... Args > -cl::pointer>> allocate_svm(const cl::Context &c, Args... args) -{ - SVMAllocator alloc(c); - return cl::allocate_pointer(alloc, args...); -} -#endif // #if !defined(CL_HPP_NO_STD_UNIQUE_PTR) - -/*! \brief Vector alias to simplify contruction of coarse-grained SVM containers. - * - */ -template < class T > -using coarse_svm_vector = vector>>; - -/*! \brief Vector alias to simplify contruction of fine-grained SVM containers. -* -*/ -template < class T > -using fine_svm_vector = vector>>; - -/*! \brief Vector alias to simplify contruction of fine-grained SVM containers that support platform atomics. -* -*/ -template < class T > -using atomic_svm_vector = vector>>; - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - - -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory -{ -public: - - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - cl_mem_flags flags, - size_type size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - size_type size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer( - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr = false, - cl_int* err = NULL) - { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - size_type size = sizeof(DataType)*(endIterator - startIterator); - - Context context = Context::getDefault(err); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - error = cl::copy(startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified context. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified queue. - * If useHostPtr is specified iterators must be random access. - */ - template< typename IteratorType > - Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Buffer() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with earlier versions. - * - * See Memory for further details. - */ - explicit Buffer(const cl_mem& buffer, bool retainObject = false) : - Memory(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer(const Buffer& buf) : Memory(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer& operator = (const Buffer &buf) - { - Memory::operator=(buf); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer(Buffer&& buf) CL_HPP_NOEXCEPT_ : Memory(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer& operator = (Buffer &&buf) - { - Memory::operator=(std::move(buf)); - return *this; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 - /*! \brief Creates a new buffer object from this. - * - * Wraps clCreateSubBuffer(). - */ - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 -}; - -#if defined (CL_HPP_USE_DX_INTEROP) -/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. - * - * This is provided to facilitate interoperability with Direct3D. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferD3D10 : public Buffer -{ -public: - - - /*! \brief Constructs a BufferD3D10, in a specified context, from a - * given ID3D10Buffer. - * - * Wraps clCreateFromD3D10BufferKHR(). - */ - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) : pfn_clCreateFromD3D10BufferKHR(nullptr) - { - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - vector props = context.getInfo(); - cl_platform platform = -1; - for( int i = 0; i < props.size(); ++i ) { - if( props[i] == CL_CONTEXT_PLATFORM ) { - platform = props[i+1]; - } - } - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateFromD3D10BufferKHR); -#elif CL_HPP_TARGET_OPENCL_VERSION >= 110 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateFromD3D10BufferKHR); -#endif - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferD3D10() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit BufferD3D10(const cl_mem& buffer, bool retainObject = false) : - Buffer(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(const BufferD3D10& buf) : - Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10& operator = (const BufferD3D10 &buf) - { - Buffer::operator=(buf); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10& operator = (BufferD3D10 &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } -}; -#endif - -/*! \brief Class interface for GL Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferGL : public Buffer -{ -public: - /*! \brief Constructs a BufferGL in a specified context, from a given - * GL buffer. - * - * Wraps clCreateFromGLBuffer(). - */ - BufferGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferGL() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit BufferGL(const cl_mem& buffer, bool retainObject = false) : - Buffer(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL(const BufferGL& buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL& operator = (const BufferGL &buf) - { - Buffer::operator=(buf); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL& operator = (BufferGL &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief Class interface for GL Render Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferRenderGL : public Buffer -{ -public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferRenderGL() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit BufferRenderGL(const cl_mem& buffer, bool retainObject = false) : - Buffer(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL(const BufferRenderGL& buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (const BufferRenderGL &buf) - { - Buffer::operator=(buf); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (BufferRenderGL &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief C++ base class for Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image : public Memory -{ -protected: - //! \brief Default constructor - initializes to NULL. - Image() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image(const cl_mem& image, bool retainObject = false) : - Memory(image, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image(const Image& img) : Memory(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image& operator = (const Image &img) - { - Memory::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image(Image&& img) CL_HPP_NOEXCEPT_ : Memory(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image& operator = (Image &&img) - { - Memory::operator=(std::move(img)); - return *this; - } - - -public: - //! \brief Wrapper for clGetImageInfo(). - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - //! \brief Wrapper for clGetImageInfo() that returns by value. - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \brief Class interface for 1D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image1D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image1D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image1D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image1D(const cl_mem& image1D, bool retainObject = false) : - Image(image1D, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D(const Image1D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D& operator = (const Image1D &img) - { - Image::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D(Image1D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D& operator = (Image1D &&img) - { - Image::operator=(std::move(img)); - return *this; - } - -}; - -/*! \class Image1DBuffer - * \brief Image interface for 1D buffer images. - */ -class Image1DBuffer : public Image -{ -public: - Image1DBuffer( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - const Buffer &buffer, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer() - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - NULL, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DBuffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image1DBuffer(const cl_mem& image1D, bool retainObject = false) : - Image(image1D, retainObject) { } - - Image1DBuffer& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(const Image1DBuffer& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer& operator = (const Image1DBuffer &img) - { - Image::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer& operator = (Image1DBuffer &&img) - { - Image::operator=(std::move(img)); - return *this; - } - -}; - -/*! \class Image1DArray - * \brief Image interface for arrays of 1D images. - */ -class Image1DArray : public Image -{ -public: - Image1DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type arraySize, - size_type width, - size_type rowPitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DArray() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image1DArray(const cl_mem& imageArray, bool retainObject = false) : - Image(imageArray, retainObject) { } - - - Image1DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(const Image1DArray& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray& operator = (const Image1DArray &img) - { - Image::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray& operator = (Image1DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } - -}; -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - - -/*! \brief Class interface for 2D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image2D : public Image -{ -public: - /*! \brief Constructs a 2D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - size_type height, - size_type row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - if (!useCreateImage) - { - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 || defined(CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR) - /*! \brief Constructs a 2D Image from a buffer. - * \note This will share storage with the underlying buffer. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - ImageFormat format, - const Buffer &sourceBuffer, - size_type width, - size_type height, - size_type row_pitch = 0, - cl_int* err = nullptr) - { - cl_int error; - - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, - // Use buffer as input to image - sourceBuffer() - }; - object_ = ::clCreateImage( - context(), - 0, // flags inherited from buffer - &format, - &desc, - nullptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } -#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 || defined(CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR) - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /*! \brief Constructs a 2D Image from an image. - * \note This will share storage with the underlying image but may - * reinterpret the channel order and type. - * - * The image will be created matching with a descriptor matching the source. - * - * \param order is the channel order to reinterpret the image data as. - * The channel order may differ as described in the OpenCL - * 2.0 API specification. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_channel_order order, - const Image &sourceImage, - cl_int* err = nullptr) - { - cl_int error; - - // Descriptor fields have to match source image - size_type sourceWidth = - sourceImage.getImageInfo(); - size_type sourceHeight = - sourceImage.getImageInfo(); - size_type sourceRowPitch = - sourceImage.getImageInfo(); - cl_uint sourceNumMIPLevels = - sourceImage.getImageInfo(); - cl_uint sourceNumSamples = - sourceImage.getImageInfo(); - cl_image_format sourceFormat = - sourceImage.getImageInfo(); - - // Update only the channel order. - // Channel format inherited from source. - sourceFormat.image_channel_order = order; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - sourceWidth, - sourceHeight, - 0, 0, // depth (unused), array size (unused) - sourceRowPitch, - 0, // slice pitch (unused) - sourceNumMIPLevels, - sourceNumSamples, - // Use buffer as input to image - sourceImage() - }; - object_ = ::clCreateImage( - context(), - 0, // flags should be inherited from mem_object - &sourceFormat, - &desc, - nullptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } -#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - - //! \brief Default constructor - initializes to NULL. - Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image2D(const cl_mem& image2D, bool retainObject = false) : - Image(image2D, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D(const Image2D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D& operator = (const Image2D &img) - { - Image::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D(Image2D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D& operator = (Image2D &&img) - { - Image::operator=(std::move(img)); - return *this; - } - -}; - - -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -/*! \brief Class interface for GL 2D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D -{ -public: - /*! \brief Constructs an Image2DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture2D(). - */ - Image2DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); - if (err != NULL) { - *err = error; - } - - } - - //! \brief Default constructor - initializes to NULL. - Image2DGL() : Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image2DGL(const cl_mem& image, bool retainObject = false) : - Image2D(image, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - *c - * See Memory for further details. - */ - Image2DGL& operator = (const cl_mem& rhs) - { - Image2D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(const Image2DGL& img) : Image2D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL& operator = (const Image2DGL &img) - { - Image2D::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT_ : Image2D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL& operator = (Image2DGL &&img) - { - Image2D::operator=(std::move(img)); - return *this; - } - -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \class Image2DArray - * \brief Image interface for arrays of 2D images. - */ -class Image2DArray : public Image -{ -public: - Image2DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type arraySize, - size_type width, - size_type height, - size_type rowPitch, - size_type slicePitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DArray() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image2DArray(const cl_mem& imageArray, bool retainObject = false) : Image(imageArray, retainObject) { } - - Image2DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(const Image2DArray& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray& operator = (const Image2DArray &img) - { - Image::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray& operator = (Image2DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } -}; -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - -/*! \brief Class interface for 3D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3D : public Image -{ -public: - /*! \brief Constructs a 3D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - size_type height, - size_type depth, - size_type row_pitch = 0, - size_type slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - if (!useCreateImage) - { - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 - } - - //! \brief Default constructor - initializes to NULL. - Image3D() : Image() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image3D(const cl_mem& image3D, bool retainObject = false) : - Image(image3D, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D(const Image3D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D& operator = (const Image3D &img) - { - Image::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D(Image3D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D& operator = (Image3D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -}; - -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -/*! \brief Class interface for GL 3D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3DGL : public Image3D -{ -public: - /*! \brief Constructs an Image3DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture3D(). - */ - Image3DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image3DGL() : Image3D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image3DGL(const cl_mem& image, bool retainObject = false) : - Image3D(image, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const cl_mem& rhs) - { - Image3D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(const Image3DGL& img) : Image3D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL& operator = (const Image3DGL &img) - { - Image3D::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT_ : Image3D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL& operator = (Image3DGL &&img) - { - Image3D::operator=(std::move(img)); - return *this; - } -}; -#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \class ImageGL - * \brief general image interface for GL interop. - * We abstract the 2D and 3D GL images into a single instance here - * that wraps all GL sourced images on the grounds that setup information - * was performed by OpenCL anyway. - */ -class ImageGL : public Image -{ -public: - ImageGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); - if (err != NULL) { - *err = error; - } - } - - ImageGL() : Image() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit ImageGL(const cl_mem& image, bool retainObject = false) : - Image(image, retainObject) { } - - ImageGL& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL(const ImageGL& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL& operator = (const ImageGL &img) - { - Image::operator=(img); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL& operator = (ImageGL &&img) - { - Image::operator=(std::move(img)); - return *this; - } -}; -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/*! \brief Class interface for Pipe Memory Objects. -* -* See Memory for details about copy semantics, etc. -* -* \see Memory -*/ -class Pipe : public Memory -{ -public: - - /*! \brief Constructs a Pipe in a specified context. - * - * Wraps clCreatePipe(). - * @param context Context in which to create the pipe. - * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. - * @param packet_size Size in bytes of a single packet of the pipe. - * @param max_packets Number of packets that may be stored in the pipe. - * - */ - Pipe( - const Context& context, - cl_uint packet_size, - cl_uint max_packets, - cl_int* err = NULL) - { - cl_int error; - - cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; - object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); - - detail::errHandler(error, __CREATE_PIPE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a Pipe in a the default context. - * - * Wraps clCreatePipe(). - * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. - * @param packet_size Size in bytes of a single packet of the pipe. - * @param max_packets Number of packets that may be stored in the pipe. - * - */ - Pipe( - cl_uint packet_size, - cl_uint max_packets, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - - cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; - object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); - - detail::errHandler(error, __CREATE_PIPE_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Pipe() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with earlier versions. - * - * See Memory for further details. - */ - explicit Pipe(const cl_mem& pipe, bool retainObject = false) : - Memory(pipe, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Pipe& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Pipe(const Pipe& pipe) : Memory(pipe) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Pipe& operator = (const Pipe &pipe) - { - Memory::operator=(pipe); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Pipe(Pipe&& pipe) CL_HPP_NOEXCEPT_ : Memory(std::move(pipe)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Pipe& operator = (Pipe &&pipe) - { - Memory::operator=(std::move(pipe)); - return *this; - } - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_pipe_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPipeInfo, object_, name, param), - __GET_PIPE_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_pipe_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; // class Pipe -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 - - -/*! \brief Class interface for cl_sampler. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_sampler as the original. For details, see - * clRetainSampler() and clReleaseSampler(). - * - * \see cl_sampler - */ -class Sampler : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Sampler() { } - - /*! \brief Constructs a Sampler in a specified context. - * - * Wraps clCreateSampler(). - */ - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - cl_sampler_properties sampler_properties[] = { - CL_SAMPLER_NORMALIZED_COORDS, normalized_coords, - CL_SAMPLER_ADDRESSING_MODE, addressing_mode, - CL_SAMPLER_FILTER_MODE, filter_mode, - 0 }; - object_ = ::clCreateSamplerWithProperties( - context(), - sampler_properties, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } -#else - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } -#endif - } - - /*! \brief Constructor from cl_sampler - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This effectively transfers ownership of a refcount on the cl_sampler - * into the new Sampler object. - */ - explicit Sampler(const cl_sampler& sampler, bool retainObject = false) : - detail::Wrapper(sampler, retainObject) { } - - /*! \brief Assignment operator from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseSampler() on the value previously held by this instance. - */ - Sampler& operator = (const cl_sampler& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler(const Sampler& sam) : detail::Wrapper(sam) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler& operator = (const Sampler &sam) - { - detail::Wrapper::operator=(sam); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler(Sampler&& sam) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(sam)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler& operator = (Sampler &&sam) - { - detail::Wrapper::operator=(std::move(sam)); - return *this; - } - - //! \brief Wrapper for clGetSamplerInfo(). - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - //! \brief Wrapper for clGetSamplerInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -class Program; -class CommandQueue; -class DeviceCommandQueue; -class Kernel; - -//! \brief Class interface for specifying NDRange values. -class NDRange -{ -private: - size_type sizes_[3]; - cl_uint dimensions_; - -public: - //! \brief Default constructor - resulting range has zero dimensions. - NDRange() - : dimensions_(0) - { - sizes_[0] = 0; - sizes_[1] = 0; - sizes_[2] = 0; - } - - //! \brief Constructs one-dimensional range. - NDRange(size_type size0) - : dimensions_(1) - { - sizes_[0] = size0; - sizes_[1] = 1; - sizes_[2] = 1; - } - - //! \brief Constructs two-dimensional range. - NDRange(size_type size0, size_type size1) - : dimensions_(2) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = 1; - } - - //! \brief Constructs three-dimensional range. - NDRange(size_type size0, size_type size1, size_type size2) - : dimensions_(3) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = size2; - } - - /*! \brief Conversion operator to const size_type *. - * - * \returns a pointer to the size of the first dimension. - */ - operator const size_type*() const { - return sizes_; - } - - //! \brief Queries the number of dimensions in the range. - size_type dimensions() const - { - return dimensions_; - } - - //! \brief Returns the size of the object in bytes based on the - // runtime number of dimensions - size_type size() const - { - return dimensions_*sizeof(size_type); - } - - size_type* get() - { - return sizes_; - } - - const size_type* get() const - { - return sizes_; - } -}; - -//! \brief A zero-dimensional range. -static const NDRange NullRange; - -//! \brief Local address wrapper for use with Kernel::setArg -struct LocalSpaceArg -{ - size_type size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler; - -// Enable for objects that are not subclasses of memory -// Pointers, constants etc -template -struct KernelArgumentHandler::value>::type> -{ - static size_type size(const T&) { return sizeof(T); } - static const T* ptr(const T& value) { return &value; } -}; - -// Enable for subclasses of memory where we want to get a reference to the cl_mem out -// and pass that in for safety -template -struct KernelArgumentHandler::value>::type> -{ - static size_type size(const T&) { return sizeof(cl_mem); } - static const cl_mem* ptr(const T& value) { return &(value()); } -}; - -// Specialization for DeviceCommandQueue defined later - -template <> -struct KernelArgumentHandler -{ - static size_type size(const LocalSpaceArg& value) { return value.size_; } - static const void* ptr(const LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -/*! Local - * \brief Helper function for generating LocalSpaceArg objects. - */ -inline LocalSpaceArg -Local(size_type size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -/*! \brief Class interface for cl_kernel. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_kernel as the original. For details, see - * clRetainKernel() and clReleaseKernel(). - * - * \see cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Kernel() { } - - /*! \brief Constructor from cl_kernel - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This effectively transfers ownership of a refcount on the cl_kernel - * into the new Kernel object. - */ - explicit Kernel(const cl_kernel& kernel, bool retainObject = false) : - detail::Wrapper(kernel, retainObject) { } - - /*! \brief Assignment operator from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseKernel() on the value previously held by this instance. - */ - Kernel& operator = (const cl_kernel& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel& operator = (const Kernel &kernel) - { - detail::Wrapper::operator=(kernel); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(kernel)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel& operator = (Kernel &&kernel) - { - detail::Wrapper::operator=(std::move(kernel)); - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - template - cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), - __GET_KERNEL_ARG_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getArgInfo(cl_uint argIndex, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_arg_info, name>::param_type param; - cl_int result = getArgInfo(argIndex, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if (CL_HPP_TARGET_OPENCL_VERSION >= 200 && defined(CL_HPP_USE_CL_SUB_GROUPS_KHR)) || CL_HPP_TARGET_OPENCL_VERSION >= 210 - cl_int getSubGroupInfo(const cl::Device &dev, cl_kernel_sub_group_info name, const cl::NDRange &range, size_type* param) const - { -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - return detail::errHandler( - clGetKernelSubGroupInfo(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), - __GET_KERNEL_SUB_GROUP_INFO_ERR); - -#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - typedef clGetKernelSubGroupInfoKHR_fn PFN_clGetKernelSubGroupInfoKHR; - static PFN_clGetKernelSubGroupInfoKHR pfn_clGetKernelSubGroupInfoKHR = NULL; - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetKernelSubGroupInfoKHR); - - return detail::errHandler( - pfn_clGetKernelSubGroupInfoKHR(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), - __GET_KERNEL_SUB_GROUP_INFO_ERR); - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - } - - template - size_type getSubGroupInfo(const cl::Device &dev, const cl::NDRange &range, cl_int* err = NULL) const - { - size_type param; - cl_int result = getSubGroupInfo(dev, name, range, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /*! \brief setArg overload taking a shared_ptr type - */ - template - cl_int setArg(cl_uint index, const cl::pointer &argPtr) - { - return detail::errHandler( - ::clSetKernelArgSVMPointer(object_, index, argPtr.get()), - __SET_KERNEL_ARGS_ERR); - } - - /*! \brief setArg overload taking a vector type. - */ - template - cl_int setArg(cl_uint index, const cl::vector &argPtr) - { - return detail::errHandler( - ::clSetKernelArgSVMPointer(object_, index, argPtr.data()), - __SET_KERNEL_ARGS_ERR); - } - - /*! \brief setArg overload taking a pointer type - */ - template - typename std::enable_if::value, cl_int>::type - setArg(cl_uint index, const T argPtr) - { - return detail::errHandler( - ::clSetKernelArgSVMPointer(object_, index, argPtr), - __SET_KERNEL_ARGS_ERR); - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - - /*! \brief setArg overload taking a POD type - */ - template - typename std::enable_if::value, cl_int>::type - setArg(cl_uint index, const T &value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, size_type size, const void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /*! - * Specify a vector of SVM pointers that the kernel may access in - * addition to its arguments. - */ - cl_int setSVMPointers(const vector &pointerList) - { - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - sizeof(void*)*pointerList.size(), - pointerList.data())); - } - - /*! - * Specify a std::array of SVM pointers that the kernel may access in - * addition to its arguments. - */ - template - cl_int setSVMPointers(const std::array &pointerList) - { - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - sizeof(void*)*pointerList.size(), - pointerList.data())); - } - - /*! \brief Enable fine-grained system SVM. - * - * \note It is only possible to enable fine-grained system SVM if all devices - * in the context associated with kernel support it. - * - * \param svmEnabled True if fine-grained system SVM is requested. False otherwise. - * \return CL_SUCCESS if the function was executed succesfully. CL_INVALID_OPERATION - * if no devices in the context support fine-grained system SVM. - * - * \see clSetKernelExecInfo - */ - cl_int enableFineGrainedSystemSVM(bool svmEnabled) - { - cl_bool svmEnabled_ = svmEnabled ? CL_TRUE : CL_FALSE; - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, - sizeof(cl_bool), - &svmEnabled_ - ) - ); - } - - template - void setSVMPointersHelper(std::array &pointerList, const pointer &t0, const pointer &t1, Ts & ... ts) - { - pointerList[index] = static_cast(t0.get()); - setSVMPointersHelper(pointerList, t1, ts...); - } - - template - typename std::enable_if::value, void>::type - setSVMPointersHelper(std::array &pointerList, T0 t0, T1 t1, Ts... ts) - { - pointerList[index] = static_cast(t0); - setSVMPointersHelper(pointerList, t1, ts...); - } - - template - void setSVMPointersHelper(std::array &pointerList, const pointer &t0) - { - pointerList[index] = static_cast(t0.get()); - } - - - template - typename std::enable_if::value, void>::type - setSVMPointersHelper(std::array &pointerList, T0 t0) - { - pointerList[index] = static_cast(t0); - } - - template - cl_int setSVMPointers(const T0 &t0, Ts & ... ts) - { - std::array pointerList; - - setSVMPointersHelper<0, 1 + sizeof...(Ts)>(pointerList, t0, ts...); - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - sizeof(void*)*(1 + sizeof...(Ts)), - pointerList.data())); - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /** - * Make a deep copy of the kernel object including its arguments. - * @return A new kernel object with internal state entirely separate from that - * of the original but with any arguments set on the original intact. - */ - Kernel clone() - { - cl_int error; - Kernel retValue(clCloneKernel(this->get(), &error)); - - detail::errHandler(error, __CLONE_KERNEL_ERR); - return retValue; - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 -}; - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: -#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - typedef vector> Binaries; - typedef vector Sources; -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - typedef vector > Binaries; - typedef vector > Sources; -#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - - Program( - const string& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const size_type length = source.size(); - - Context context = Context::getDefault(err); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, -#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", -#else - "", -#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - NULL, - NULL); - - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const string& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const size_type length = source.size(); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - error = ::clBuildProgram( - object_, - 0, - NULL, -#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", -#else - "", -#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - NULL, - NULL); - - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - if (err != NULL) { - *err = error; - } - } - - /** - * Create a program from a vector of source strings and the default context. - * Does not compile or link the program. - */ - Program( - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - Context context = Context::getDefault(err); - - const size_type n = (size_type)sources.size(); - - vector lengths(n); - vector strings(n); - - for (size_type i = 0; i < n; ++i) { -#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].data(); - lengths[i] = sources[(int)i].length(); -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; -#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings.data(), lengths.data(), &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - /** - * Create a program from a vector of source strings and a provided context. - * Does not compile or link the program. - */ - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const size_type n = (size_type)sources.size(); - - vector lengths(n); - vector strings(n); - - for (size_type i = 0; i < n; ++i) { -#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].data(); - lengths[i] = sources[(int)i].length(); -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; -#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings.data(), lengths.data(), &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 || (CL_HPP_TARGET_OPENCL_VERSION==200 && defined(CL_HPP_USE_IL_KHR)) - /** - * Program constructor to allow construction of program from SPIR-V or another IL. - * Valid for either OpenCL >= 2.1 or when CL_HPP_USE_IL_KHR is defined. - */ - Program( - const vector& IL, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - object_ = ::clCreateProgramWithIL( - context(), static_cast(IL.data()), IL.size(), &error); - -#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; - static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); - - return detail::errHandler( - pfn_clCreateProgramWithILKHR( - context(), static_cast(IL.data()), IL.size(), &error); - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, -#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", -#else - "", -#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - NULL, - NULL); - - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - if (err != NULL) { - *err = error; - } - } - - /** - * Program constructor to allow construction of program from SPIR-V or another IL - * for a specific context. - * Valid for either OpenCL >= 2.1 or when CL_HPP_USE_IL_KHR is defined. - */ - Program( - const Context& context, - const vector& IL, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - object_ = ::clCreateProgramWithIL( - context(), static_cast(IL.data()), IL.size(), &error); - -#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; - static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); - - return detail::errHandler( - pfn_clCreateProgramWithILKHR( - context(), static_cast(IL.data()), IL.size(), &error); - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); - - if (error == CL_SUCCESS && build) { - error = ::clBuildProgram( - object_, - 0, - NULL, -#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", -#else - "", -#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - NULL, - NULL); - - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - if (err != NULL) { - *err = error; - } - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - /** - * Construct a program object from a list of devices and a per-device list of binaries. - * \param context A valid OpenCL context in which to construct the program. - * \param devices A vector of OpenCL device objects for which the program will be created. - * \param binaries A vector of pairs of a pointer to a binary object and its length. - * \param binaryStatus An optional vector that on completion will be resized to - * match the size of binaries and filled with values to specify if each binary - * was successfully loaded. - * Set to CL_SUCCESS if the binary was successfully loaded. - * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. - * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. - * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: - * CL_INVALID_CONTEXT if context is not a valid context. - * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; - * or if any entry in binaries is NULL or has length 0. - * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. - * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. - */ - Program( - const Context& context, - const vector& devices, - const Binaries& binaries, - vector* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - - const size_type numDevices = devices.size(); - - // Catch size mismatch early and return - if(binaries.size() != numDevices) { - error = CL_INVALID_VALUE; - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - - vector lengths(numDevices); - vector images(numDevices); -#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - for (size_type i = 0; i < numDevices; ++i) { - images[i] = binaries[i].data(); - lengths[i] = binaries[(int)i].size(); - } -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - for (size_type i = 0; i < numDevices; ++i) { - images[i] = (const unsigned char*)binaries[i].first; - lengths[i] = binaries[(int)i].second; - } -#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - - vector deviceIDs(numDevices); - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - if(binaryStatus) { - binaryStatus->resize(numDevices); - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - deviceIDs.data(), - lengths.data(), images.data(), (binaryStatus != NULL && numDevices > 0) - ? &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Create program using builtin kernels. - * \param kernelNames Semi-colon separated list of builtin kernel names - */ - Program( - const Context& context, - const vector& devices, - const string& kernelNames, - cl_int* err = NULL) - { - cl_int error; - - - size_type numDevices = devices.size(); - vector deviceIDs(numDevices); - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateProgramWithBuiltInKernels( - context(), - (cl_uint) devices.size(), - deviceIDs.data(), - kernelNames.c_str(), - &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - Program() { } - - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - */ - explicit Program(const cl_program& program, bool retainObject = false) : - detail::Wrapper(program, retainObject) { } - - Program& operator = (const cl_program& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program(const Program& program) : detail::Wrapper(program) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program& operator = (const Program &program) - { - detail::Wrapper::operator=(program); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Program(Program&& program) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(program)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Program& operator = (Program &&program) - { - detail::Wrapper::operator=(std::move(program)); - return *this; - } - - cl_int build( - const vector& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - size_type numDevices = devices.size(); - vector deviceIDs(numDevices); - - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - cl_int buildError = ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs.data(), - options, - notifyFptr, - data); - - return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - cl_int build( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - cl_int buildError = ::clBuildProgram( - object_, - 0, - NULL, - options, - notifyFptr, - data); - - - return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_int compile( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - cl_int error = ::clCompileProgram( - object_, - 0, - NULL, - options, - 0, - NULL, - NULL, - notifyFptr, - data); - return detail::buildErrHandler(error, __COMPILE_PROGRAM_ERR, getBuildInfo()); - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /** - * Build info function that returns a vector of device/info pairs for the specified - * info type and for all devices in the program. - * On an error reading the info for any device, an empty vector of info will be returned. - */ - template - vector::param_type>> - getBuildInfo(cl_int *err = NULL) const - { - cl_int result = CL_SUCCESS; - - auto devs = getInfo(&result); - vector::param_type>> - devInfo; - - // If there was an initial error from getInfo return the error - if (result != CL_SUCCESS) { - if (err != NULL) { - *err = result; - } - return devInfo; - } - - for (const cl::Device &d : devs) { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - result = getBuildInfo(d, name, ¶m); - devInfo.push_back( - std::pair::param_type> - (d, param)); - if (result != CL_SUCCESS) { - // On error, leave the loop and return the error code - break; - } - } - if (err != NULL) { - *err = result; - } - if (result != CL_SUCCESS) { - devInfo.clear(); - } - return devInfo; - } - - cl_int createKernels(vector* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - vector value(numKernels); - - err = ::clCreateKernelsInProgram( - object_, numKernels, value.data(), NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - if (kernels) { - kernels->resize(value.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < value.size(); i++) { - // We do not need to retain because this kernel is being created - // by the runtime - (*kernels)[i] = Kernel(value[i], false); - } - } - return CL_SUCCESS; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 220 - /*! \brief Registers a callback function to be called when destructors for - * program scope global variables are complete and before the - * program is released. - * - * Wraps clSetProgramReleaseCallback(). - * - * Each call to this function registers the specified user callback function - * on a callback stack associated with program. The registered user callback - * functions are called in the reverse order in which they were registered. - */ - cl_int setReleaseCallback( - void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetProgramReleaseCallback( - object_, - pfn_notify, - user_data), - __SET_PROGRAM_RELEASE_CALLBACK_ERR); - } - - /*! \brief Sets a SPIR-V specialization constant. - * - * Wraps clSetProgramSpecializationConstant(). - */ - template - typename std::enable_if::value, cl_int>::type - setSpecializationConstant(cl_uint index, const T &value) - { - return detail::errHandler( - ::clSetProgramSpecializationConstant( - object_, - index, - sizeof(value), - &value), - __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); - } - - /*! \brief Sets a SPIR-V specialization constant. - * - * Wraps clSetProgramSpecializationConstant(). - */ - cl_int setSpecializationConstant(cl_uint index, size_type size, const void* value) - { - return detail::errHandler( - ::clSetProgramSpecializationConstant( - object_, - index, - size, - value), - __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 -}; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -inline Program linkProgram( - Program input1, - Program input2, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - cl_program programs[2] = { input1(), input2() }; - - Context ctx = input1.getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - 2, - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = error_local; - } - - return Program(prog); -} - -inline Program linkProgram( - vector inputPrograms, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - vector programs(inputPrograms.size()); - - for (unsigned int i = 0; i < inputPrograms.size(); i++) { - programs[i] = inputPrograms[i](); - } - - Context ctx; - if(inputPrograms.size() > 0) { - ctx = inputPrograms[0].getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - } - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - (cl_uint)inputPrograms.size(), - programs.data(), - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = error_local; - } - - return Program(prog, false); -} -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - -// Template specialization for CL_PROGRAM_BINARIES -template <> -inline cl_int cl::Program::getInfo(cl_program_info name, vector>* param) const -{ - if (name != CL_PROGRAM_BINARIES) { - return CL_INVALID_VALUE; - } - if (param) { - // Resize the parameter array appropriately for each allocation - // and pass down to the helper - - vector sizes = getInfo(); - size_type numBinaries = sizes.size(); - - // Resize the parameter array and constituent arrays - param->resize(numBinaries); - for (size_type i = 0; i < numBinaries; ++i) { - (*param)[i].resize(sizes[i]); - } - - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - return CL_SUCCESS; -} - -template<> -inline vector> cl::Program::getInfo(cl_int* err) const -{ - vector> binariesVectors; - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binariesVectors); - if (err != NULL) { - *err = result; - } - return binariesVectors; -} - -#if CL_HPP_TARGET_OPENCL_VERSION >= 220 -// Template specialization for clSetProgramSpecializationConstant -template <> -inline cl_int cl::Program::setSpecializationConstant(cl_uint index, const bool &value) -{ - cl_uchar ucValue = value ? CL_UCHAR_MAX : 0; - return detail::errHandler( - ::clSetProgramSpecializationConstant( - object_, - index, - sizeof(ucValue), - &ucValue), - __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); -} -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -enum class QueueProperties : cl_command_queue_properties -{ - None = 0, - Profiling = CL_QUEUE_PROFILING_ENABLE, - OutOfOrder = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, -}; - -inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) -{ - return static_cast(static_cast(lhs) | static_cast(rhs)); -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static CommandQueue default_; - static cl_int default_error_; - - /*! \brief Create the default command queue returned by @ref getDefault. - * - * It sets default_error_ to indicate success or failure. It does not throw - * @c cl::Error. - */ - static void makeDefault() - { - /* We don't want to throw an error from this function, so we have to - * catch and set the error flag. - */ -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - try -#endif - { - int error; - Context context = Context::getDefault(&error); - - if (error != CL_SUCCESS) { - default_error_ = error; - } - else { - Device device = Device::getDefault(); - default_ = CommandQueue(context, device, 0, &default_error_); - } - } -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } -#endif - } - - /*! \brief Create the default command queue. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const CommandQueue &c) { - default_ = c; - } - -public: -#ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = CommandQueue(); - } -#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - - - /*! - * \brief Constructs a CommandQueue based on passed properties. - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - cl_command_queue_properties properties, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - bool useWithProperties; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; -#else - useWithProperties = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; - if ((properties & CL_QUEUE_ON_DEVICE) == 0) { - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - } - else { - error = CL_INVALID_QUEUE_PROPERTIES; - } - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - } - - /*! - * \brief Constructs a CommandQueue based on passed properties. - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - QueueProperties properties, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - bool useWithProperties; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; -#else - useWithProperties = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; - - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), static_cast(properties), &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - - } - } - - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - explicit CommandQueue( - const Context& context, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - bool useWithProperties; - vector devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) { - *err = error; - } - return; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; -#else - useWithProperties = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; - if ((properties & CL_QUEUE_ON_DEVICE) == 0) { - object_ = ::clCreateCommandQueueWithProperties( - context(), devices[0](), queue_properties, &error); - } - else { - error = CL_INVALID_QUEUE_PROPERTIES; - } - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), devices[0](), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - explicit CommandQueue( - const Context& context, - QueueProperties properties, - cl_int* err = NULL) - { - cl_int error; - bool useWithProperties; - vector devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) { - *err = error; - } - return; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; -#else - useWithProperties = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), devices[0](), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), devices[0](), static_cast(properties), &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - - /*! - * \brief Constructs a CommandQueue for a passed device and context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - bool useWithProperties; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; -#else - useWithProperties = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - - /*! - * \brief Constructs a CommandQueue for a passed device and context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - const Context& context, - const Device& device, - QueueProperties properties, - cl_int* err = NULL) - { - cl_int error; - bool useWithProperties; - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } -#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; -#else - useWithProperties = false; -#endif - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), static_cast(properties), &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - - static CommandQueue getDefault(cl_int * err = NULL) - { - std::call_once(default_initialized_, makeDefault); -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); -#else // CL_HPP_TARGET_OPENCL_VERSION >= 200 - detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_ERR); -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - /** - * Modify the default command queue to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default command queue. - * Should be compared to the passed value to ensure that it was updated. - */ - static CommandQueue setDefault(const CommandQueue &default_queue) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_queue)); - detail::errHandler(default_error_); - return default_; - } - - CommandQueue() { } - - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - */ - explicit CommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : - detail::Wrapper(commandQueue, retainObject) { } - - CommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue& operator = (const CommandQueue &queue) - { - detail::Wrapper::operator=(queue); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue& operator = (CommandQueue &&queue) - { - detail::Wrapper::operator=(std::move(queue)); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - void* ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - const void* ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - size_type src_offset, - size_type dst_offset, - size_type size, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - void *ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - buffer_offset.data(), - host_offset.data(), - region.data(), - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - const void *ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - buffer_offset.data(), - host_offset.data(), - region.data(), - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - size_type src_row_pitch, - size_type src_slice_pitch, - size_type dst_row_pitch, - size_type dst_slice_pitch, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - src_origin.data(), - dst_origin.data(), - region.data(), - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified as a vector type. - * \tparam PatternType The datatype of the pattern field. - * The pattern type must be an accepted OpenCL data type. - * \tparam offset Is the offset in bytes into the buffer at - * which to start filling. This must be a multiple of - * the pattern size. - * \tparam size Is the size in bytes of the region to fill. - * This must be a multiple of the pattern size. - */ - template - cl_int enqueueFillBuffer( - const Buffer& buffer, - PatternType pattern, - size_type offset, - size_type size, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillBuffer( - object_, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - void* ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadImage( - object_, - image(), - blocking, - origin.data(), - region.data(), - row_pitch, - slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - const void* ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteImage( - object_, - image(), - blocking, - origin.data(), - region.data(), - row_pitch, - slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImage( - object_, - src(), - dst(), - src_origin.data(), - dst_origin.data(), - region.data(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point color value if - * the image channel data type is not an unnormalized signed or - * unsigned data type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_float4 fillColor, - const array& origin, - const array& region, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - origin.data(), - region.data(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA signed integer color value if - * the image channel data type is an unnormalized signed integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_int4 fillColor, - const array& origin, - const array& region, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - origin.data(), - region.data(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA unsigned integer color value if - * the image channel data type is an unnormalized unsigned integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_uint4 fillColor, - const array& origin, - const array& region, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - origin.data(), - region.data(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const array& src_origin, - const array& region, - size_type dst_offset, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, - src(), - dst(), - src_origin.data(), - region.data(), - dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - size_type src_offset, - const array& dst_origin, - const array& region, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, - src(), - dst(), - src_offset, - dst_origin.data(), - region.data(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - size_type offset, - size_type size, - const vector* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const array& origin, - const array& region, - size_type * row_pitch, - size_type * slice_pitch, - const vector* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - origin.data(), - region.data(), - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - return result; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /** - * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. - * This variant takes a raw SVM pointer. - */ - template - cl_int enqueueMapSVM( - T* ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMap( - object_, blocking, flags, static_cast(ptr), size, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MAP_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - - /** - * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. - * This variant takes a cl::pointer instance. - */ - template - cl_int enqueueMapSVM( - cl::pointer &ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMap( - object_, blocking, flags, static_cast(ptr.get()), size, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MAP_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. - * This variant takes a cl::vector instance. - */ - template - cl_int enqueueMapSVM( - cl::vector &container, - cl_bool blocking, - cl_map_flags flags, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMap( - object_, blocking, flags, static_cast(container.data()), container.size(), - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MAP_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /** - * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. - * This variant takes a raw SVM pointer. - */ - template - cl_int enqueueUnmapSVM( - T* ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueSVMUnmap( - object_, static_cast(ptr), - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. - * This variant takes a cl::pointer instance. - */ - template - cl_int enqueueUnmapSVM( - cl::pointer &ptr, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueSVMUnmap( - object_, static_cast(ptr.get()), - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. - * This variant takes a cl::vector instance. - */ - template - cl_int enqueueUnmapSVM( - cl::vector &container, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueSVMUnmap( - object_, static_cast(container.data()), - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. - */ - cl_int enqueueMarkerWithWaitList( - const vector *events = 0, - Event *event = 0) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command - * returns an event which can be waited on, i.e. this event can be waited on to insure that - * all events either in the event_wait_list or all previously enqueued commands, queued - * before this command to command_queue, have completed. - */ - cl_int enqueueBarrierWithWaitList( - const vector *events = 0, - Event *event = 0) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueBarrierWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_BARRIER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to indicate with which device a set of memory objects - * should be associated. - */ - cl_int enqueueMigrateMemObjects( - const vector &memObjects, - cl_mem_migration_flags flags, - const vector* events = NULL, - Event* event = NULL - ) const - { - cl_event tmp; - - vector localMemObjects(memObjects.size()); - - for( int i = 0; i < (int)memObjects.size(); ++i ) { - localMemObjects[i] = memObjects[i](); - } - - cl_int err = detail::errHandler( - ::clEnqueueMigrateMemObjects( - object_, - (cl_uint)memObjects.size(), - localMemObjects.data(), - flags, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /** - * Enqueues a command that will allow the host associate ranges within a set of - * SVM allocations with a device. - * @param sizes - The length from each pointer to migrate. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector &svmRawPointers, - const cl::vector &sizes, - cl_mem_migration_flags flags = 0, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMigrateMem( - object_, - svmRawPointers.size(), static_cast(svmRawPointers.data()), - sizes.data(), // array of sizes not passed - flags, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MIGRATE_SVM_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will allow the host associate a set of SVM allocations with - * a device. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector &svmRawPointers, - cl_mem_migration_flags flags = 0, - const vector* events = NULL, - Event* event = NULL) const - { - return enqueueMigrateSVM(svmRawPointers, cl::vector(svmRawPointers.size()), flags, events, event); - } - - - /** - * Enqueues a command that will allow the host associate ranges within a set of - * SVM allocations with a device. - * @param sizes - The length from each pointer to migrate. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmPointers, - const cl::vector &sizes, - cl_mem_migration_flags flags = 0, - const vector* events = NULL, - Event* event = NULL) const - { - cl::vector svmRawPointers; - svmRawPointers.reserve(svmPointers.size()); - for (auto p : svmPointers) { - svmRawPointers.push_back(static_cast(p.get())); - } - - return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); - } - - - /** - * Enqueues a command that will allow the host associate a set of SVM allocations with - * a device. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmPointers, - cl_mem_migration_flags flags = 0, - const vector* events = NULL, - Event* event = NULL) const - { - return enqueueMigrateSVM(svmPointers, cl::vector(svmPointers.size()), flags, events, event); - } - - /** - * Enqueues a command that will allow the host associate ranges within a set of - * SVM allocations with a device. - * @param sizes - The length from the beginning of each container to migrate. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmContainers, - const cl::vector &sizes, - cl_mem_migration_flags flags = 0, - const vector* events = NULL, - Event* event = NULL) const - { - cl::vector svmRawPointers; - svmRawPointers.reserve(svmContainers.size()); - for (auto p : svmContainers) { - svmRawPointers.push_back(static_cast(p.data())); - } - - return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); - } - - /** - * Enqueues a command that will allow the host associate a set of SVM allocations with - * a device. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmContainers, - cl_mem_migration_flags flags = 0, - const vector* events = NULL, - Event* event = NULL) const - { - return enqueueMigrateSVM(svmContainers, cl::vector(svmContainers.size()), flags, events, event); - } - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local = NullRange, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const size_type*) offset : NULL, - (const size_type*) global, - local.dimensions() != 0 ? (const size_type*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NDRANGE_KERNEL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) - CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( - const Kernel& kernel, - const vector* events = NULL, - Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_TASK_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) - - cl_int enqueueNativeKernel( - void (CL_CALLBACK *userFptr)(void *), - std::pair args, - const vector* mem_objects = NULL, - const vector* mem_locs = NULL, - const vector* events = NULL, - Event* event = NULL) const - { - size_type elements = 0; - if (mem_objects != NULL) { - elements = mem_objects->size(); - } - vector mems(elements); - for (unsigned int i = 0; i < elements; i++) { - mems[i] = ((*mem_objects)[i])(); - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems.data(), - (mem_locs != NULL && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NATIVE_KERNEL); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarker( - object_, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const vector& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - events.size() > 0 ? (const cl_event*) &events.front() : NULL), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } -#endif // defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - - cl_int enqueueAcquireGLObjects( - const vector* mem_objects = NULL, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseGLObjects( - const vector* mem_objects = NULL, - const vector* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined (CL_HPP_USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const vector* mem_objects = NULL, - const vector* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireD3D10ObjectsKHR); -#endif -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueAcquireD3D10ObjectsKHR); -#endif - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseD3D10Objects( - const vector* mem_objects = NULL, - const vector* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueReleaseD3D10ObjectsKHR); -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } -#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; // CommandQueue - -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ CommandQueue CommandQueue::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int CommandQueue::default_error_ = CL_SUCCESS; - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -enum class DeviceQueueProperties : cl_command_queue_properties -{ - None = 0, - Profiling = CL_QUEUE_PROFILING_ENABLE, -}; - -inline DeviceQueueProperties operator|(DeviceQueueProperties lhs, DeviceQueueProperties rhs) -{ - return static_cast(static_cast(lhs) | static_cast(rhs)); -} - -/*! \class DeviceCommandQueue - * \brief DeviceCommandQueue interface for device cl_command_queues. - */ -class DeviceCommandQueue : public detail::Wrapper -{ -public: - - /*! - * Trivial empty constructor to create a null queue. - */ - DeviceCommandQueue() { } - - /*! - * Default construct device command queue on default context and device - */ - DeviceCommandQueue(DeviceQueueProperties properties, cl_int* err = NULL) - { - cl_int error; - cl::Context context = cl::Context::getDefault(); - cl::Device device = cl::Device::getDefault(); - - cl_command_queue_properties mergedProperties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); - - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, mergedProperties, 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! - * Create a device command queue for a specified device in the passed context. - */ - DeviceCommandQueue( - const Context& context, - const Device& device, - DeviceQueueProperties properties = DeviceQueueProperties::None, - cl_int* err = NULL) - { - cl_int error; - - cl_command_queue_properties mergedProperties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, mergedProperties, 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! - * Create a device command queue for a specified device in the passed context. - */ - DeviceCommandQueue( - const Context& context, - const Device& device, - cl_uint queueSize, - DeviceQueueProperties properties = DeviceQueueProperties::None, - cl_int* err = NULL) - { - cl_int error; - - cl_command_queue_properties mergedProperties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, mergedProperties, - CL_QUEUE_SIZE, queueSize, - 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructor from cl_command_queue - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - */ - explicit DeviceCommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : - detail::Wrapper(commandQueue, retainObject) { } - - DeviceCommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - DeviceCommandQueue(const DeviceCommandQueue& queue) : detail::Wrapper(queue) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - DeviceCommandQueue& operator = (const DeviceCommandQueue &queue) - { - detail::Wrapper::operator=(queue); - return *this; - } - - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - DeviceCommandQueue(DeviceCommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - DeviceCommandQueue& operator = (DeviceCommandQueue &&queue) - { - detail::Wrapper::operator=(std::move(queue)); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! - * Create a new default device command queue for the default device, - * in the default context and of the default size. - * If there is already a default queue for the specified device this - * function will return the pre-existing queue. - */ - static DeviceCommandQueue makeDefault( - cl_int *err = nullptr) - { - cl_int error; - cl::Context context = cl::Context::getDefault(); - cl::Device device = cl::Device::getDefault(); - - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, - 0 }; - DeviceCommandQueue deviceQueue( - ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error)); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - - return deviceQueue; - } - - /*! - * Create a new default device command queue for the specified device - * and of the default size. - * If there is already a default queue for the specified device this - * function will return the pre-existing queue. - */ - static DeviceCommandQueue makeDefault( - const Context &context, const Device &device, cl_int *err = nullptr) - { - cl_int error; - - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, - 0 }; - DeviceCommandQueue deviceQueue( - ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error)); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - - return deviceQueue; - } - - /*! - * Create a new default device command queue for the specified device - * and of the requested size in bytes. - * If there is already a default queue for the specified device this - * function will return the pre-existing queue. - */ - static DeviceCommandQueue makeDefault( - const Context &context, const Device &device, cl_uint queueSize, cl_int *err = nullptr) - { - cl_int error; - - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, - CL_QUEUE_SIZE, queueSize, - 0 }; - DeviceCommandQueue deviceQueue( - ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error)); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != NULL) { - *err = error; - } - - return deviceQueue; - } - - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /*! - * Modify the default device command queue to be used for subsequent kernels. - * This can update the default command queue for a device repeatedly to account - * for kernels that rely on the default. - * @return updated default device command queue. - */ - static DeviceCommandQueue updateDefault(const Context &context, const Device &device, const DeviceCommandQueue &default_queue, cl_int *err = nullptr) - { - cl_int error; - error = clSetDefaultDeviceCommandQueue(context.get(), device.get(), default_queue.get()); - - detail::errHandler(error, __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - return default_queue; - } - - /*! - * Return the current default command queue for the specified command queue - */ - static DeviceCommandQueue getDefault(const CommandQueue &queue, cl_int * err = NULL) - { - return queue.getInfo(err); - } - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 -}; // DeviceCommandQueue - -namespace detail -{ - // Specialization for device command queue - template <> - struct KernelArgumentHandler - { - static size_type size(const cl::DeviceCommandQueue&) { return sizeof(cl_command_queue); } - static const cl_command_queue* ptr(const cl::DeviceCommandQueue& value) { return &(value()); } - }; -} // namespace detail - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - - -template< typename IteratorType > -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - size_type size = sizeof(DataType)*(endIterator - startIterator); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -template< typename IteratorType > -Buffer::Buffer( - const CommandQueue &queue, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) { - flags |= CL_MEM_USE_HOST_PTR; - } - - size_type size = sizeof(DataType)*(endIterator - startIterator); - - Context context = queue.getInfo(); - - if (useHostPtr) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } - else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if (!useHostPtr) { - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -inline cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - void* ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - const void* ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - size_type offset, - size_type size, - const vector* events = NULL, - Event* event = NULL, - cl_int* err = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - void * result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; -} - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/** - * Enqueues to the default queue a command that will allow the host to - * update a region of a coarse-grained SVM buffer. - * This variant takes a raw SVM pointer. - */ -template -inline cl_int enqueueMapSVM( - T* ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events, - Event* event) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - } - - return queue.enqueueMapSVM( - ptr, blocking, flags, size, events, event); -} - -/** - * Enqueues to the default queue a command that will allow the host to - * update a region of a coarse-grained SVM buffer. - * This variant takes a cl::pointer instance. - */ -template -inline cl_int enqueueMapSVM( - cl::pointer ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - } - - return queue.enqueueMapSVM( - ptr, blocking, flags, size, events, event); -} - -/** - * Enqueues to the default queue a command that will allow the host to - * update a region of a coarse-grained SVM buffer. - * This variant takes a cl::vector instance. - */ -template -inline cl_int enqueueMapSVM( - cl::vector container, - cl_bool blocking, - cl_map_flags flags, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - } - - return queue.enqueueMapSVM( - container, blocking, flags, events, event); -} - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -inline cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint)events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/** - * Enqueues to the default queue a command that will release a coarse-grained - * SVM buffer back to the OpenCL runtime. - * This variant takes a raw SVM pointer. - */ -template -inline cl_int enqueueUnmapSVM( - T* ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - -} - -/** - * Enqueues to the default queue a command that will release a coarse-grained - * SVM buffer back to the OpenCL runtime. - * This variant takes a cl::pointer instance. - */ -template -inline cl_int enqueueUnmapSVM( - cl::pointer &ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); -} - -/** - * Enqueues to the default queue a command that will release a coarse-grained - * SVM buffer back to the OpenCL runtime. - * This variant takes a cl::vector instance. - */ -template -inline cl_int enqueueUnmapSVM( - cl::vector &container, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - return detail::errHandler(queue.enqueueUnmapSVM(container, events, event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); -} - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -inline cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - size_type src_offset, - size_type dst_offset, - size_type size, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - size_type length = endIterator-startIterator; - size_type byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } -#if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); -#else - std::copy(startIterator, endIterator, pointer); -#endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - size_type length = endIterator-startIterator; - size_type byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/** - * Blocking SVM map operation - performs a blocking map underneath. - */ -template -inline cl_int mapSVM(cl::vector &container) -{ - return enqueueMapSVM(container, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE); -} - -/** -* Blocking SVM map operation - performs a blocking map underneath. -*/ -template -inline cl_int unmapSVM(cl::vector &container) -{ - return enqueueUnmapSVM(container); -} - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -#if CL_HPP_TARGET_OPENCL_VERSION >= 110 -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - void *ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - const void *ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - size_type src_row_pitch, - size_type src_slice_pitch, - size_type dst_row_pitch, - size_type dst_slice_pitch, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 - -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - void* ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - const void* ptr, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const array& src_origin, - const array& region, - size_type dst_offset, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - size_type src_offset, - const array& dst_origin, - const array& region, - const vector* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - - return queue.finish(); -} - -class EnqueueArgs -{ -private: - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - vector events_; - - template - friend class KernelFunctor; - -public: - EnqueueArgs(NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(Event e, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(const vector &events, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(const vector &events, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(const vector &events, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const vector &events, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } -}; - - -//---------------------------------------------------------------------------------------------- - - -/** - * Type safe kernel functor. - * - */ -template -class KernelFunctor -{ -private: - Kernel kernel_; - - template - void setArgs(T0&& t0, T1s&&... t1s) - { - kernel_.setArg(index, t0); - setArgs(std::forward(t1s)...); - } - - template - void setArgs(T0&& t0) - { - kernel_.setArg(index, t0); - } - - template - void setArgs() - { - } - - -public: - KernelFunctor(Kernel kernel) : kernel_(kernel) - {} - - KernelFunctor( - const Program& program, - const string name, - cl_int * err = NULL) : - kernel_(program, name.c_str(), err) - {} - - //! \brief Return type of the functor - typedef Event result_type; - - /** - * Enqueue kernel. - * @param args Launch parameters of the kernel. - * @param t0... List of kernel arguments based on the template type of the functor. - */ - Event operator() ( - const EnqueueArgs& args, - Ts... ts) - { - Event event; - setArgs<0>(std::forward(ts)...); - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } - - /** - * Enqueue kernel with support for error code. - * @param args Launch parameters of the kernel. - * @param t0... List of kernel arguments based on the template type of the functor. - * @param error Out parameter returning the error code from the execution. - */ - Event operator() ( - const EnqueueArgs& args, - Ts... ts, - cl_int &error) - { - Event event; - setArgs<0>(std::forward(ts)...); - - error = args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - cl_int setSVMPointers(const vector &pointerList) - { - return kernel_.setSVMPointers(pointerList); - } - - template - cl_int setSVMPointers(const T0 &t0, T1s &... ts) - { - return kernel_.setSVMPointers(t0, ts...); - } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - - Kernel getKernel() - { - return kernel_; - } -}; - -namespace compatibility { - /** - * Backward compatibility class to ensure that cl.hpp code works with cl2.hpp. - * Please use KernelFunctor directly. - */ - template - struct make_kernel - { - typedef KernelFunctor FunctorType; - - FunctorType functor_; - - make_kernel( - const Program& program, - const string name, - cl_int * err = NULL) : - functor_(FunctorType(program, name, err)) - {} - - make_kernel( - const Kernel kernel) : - functor_(FunctorType(kernel)) - {} - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - Ts...); - - Event operator()( - const EnqueueArgs& enqueueArgs, - Ts... args) - { - return functor_( - enqueueArgs, args...); - } - }; -} // namespace compatibility - - -//---------------------------------------------------------------------------------------------------------------------- - -#undef CL_HPP_ERR_STR_ -#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_PLATFORM_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_ARG_INFO_ERR -#undef __GET_KERNEL_SUB_GROUP_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR -#undef __CREATE_CONTEXT_ERR -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR -#undef __CREATE_BUFFER_ERR -#undef __COPY_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_GL_BUFFER_ERR -#undef __CREATE_GL_RENDER_BUFFER_ERR -#undef __GET_GL_OBJECT_INFO_ERR -#undef __CREATE_IMAGE_ERR -#undef __CREATE_GL_TEXTURE_ERR -#undef __IMAGE_DIMENSION_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR -#undef __WAIT_FOR_EVENTS_ERR -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_IL_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __CREATE_PROGRAM_WITH_IL_ERR -#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR -#undef __BUILD_PROGRAM_ERR -#undef __COMPILE_PROGRAM_ERR -#undef __LINK_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR -#undef __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR -#undef __CREATE_SAMPLER_WITH_PROPERTIES_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_FILL_BUFFER_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_FILL_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_NATIVE_KERNEL -#undef __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR -#undef __ENQUEUE_MIGRATE_SVM_ERR -#undef __ENQUEUE_ACQUIRE_GL_ERR -#undef __ENQUEUE_RELEASE_GL_ERR -#undef __CREATE_PIPE_ERR -#undef __GET_PIPE_INFO_ERR -#undef __RETAIN_ERR -#undef __RELEASE_ERR -#undef __FLUSH_ERR -#undef __FINISH_ERR -#undef __VECTOR_CAPACITY_ERR -#undef __CREATE_SUB_DEVICES_ERR -#undef __CREATE_SUB_DEVICES_ERR -#undef __ENQUEUE_MARKER_ERR -#undef __ENQUEUE_WAIT_FOR_EVENTS_ERR -#undef __ENQUEUE_BARRIER_ERR -#undef __UNLOAD_COMPILER_ERR -#undef __CREATE_GL_TEXTURE_2D_ERR -#undef __CREATE_GL_TEXTURE_3D_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __ENQUEUE_TASK_ERR -#undef __CREATE_SAMPLER_ERR -#undef __ENQUEUE_MARKER_WAIT_LIST_ERR -#undef __ENQUEUE_BARRIER_WAIT_LIST_ERR -#undef __CLONE_KERNEL_ERR -#undef __GET_HOST_TIMER_ERR -#undef __GET_DEVICE_AND_HOST_TIMER_ERR - -#endif //CL_HPP_USER_OVERRIDE_ERROR_STRINGS - -// Extensions -#undef CL_HPP_INIT_CL_EXT_FCN_PTR_ -#undef CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_ - -#if defined(CL_HPP_USE_CL_DEVICE_FISSION) -#undef CL_HPP_PARAM_NAME_DEVICE_FISSION_ -#endif // CL_HPP_USE_CL_DEVICE_FISSION - -#undef CL_HPP_NOEXCEPT_ -#undef CL_HPP_DEFINE_STATIC_MEMBER_ - -} // namespace cl - -#endif // CL_HPP_ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#warning cl2.hpp has been renamed to opencl.hpp to make it clear that it supports all versions of OpenCL. Please include opencl.hpp directly. diff --git a/external/CL/opencl.h b/external/CL/opencl.h new file mode 100644 index 000000000..1c4e10c88 --- /dev/null +++ b/external/CL/opencl.h @@ -0,0 +1,33 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ diff --git a/external/CL/opencl.hpp b/external/CL/opencl.hpp new file mode 100644 index 000000000..123e9190b --- /dev/null +++ b/external/CL/opencl.hpp @@ -0,0 +1,10285 @@ +// +// Copyright (c) 2008-2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33), + * OpenCL 1.2 (rev 15), OpenCL 2.0 (rev 29), OpenCL 2.1 (rev 17), + * and OpenCL 2.2 (V2.2-11). + * \author Lee Howes and Bruce Merry + * + * Derived from the OpenCL 1.x C++ bindings written by + * Benedict R. Gaster, Laurent Morichetti and Lee Howes + * With additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 + * Matt Gruenke, April 2012. + * Bruce Merry, February 2013. + * Tom Deakin and Simon McIntosh-Smith, July 2013 + * James Price, 2015- + * \version 2.2.0 + * \date 2019-09-18 + * + * Optional extension support + * + * cl_ext_device_fission + * #define CL_HPP_USE_CL_DEVICE_FISSION + * cl_khr_d3d10_sharing + * #define CL_HPP_USE_DX_INTEROP + * cl_khr_sub_groups + * #define CL_HPP_USE_CL_SUB_GROUPS_KHR + * cl_khr_image2d_from_buffer + * #define CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR + * + * Doxygen documentation for this header is available here: + * + * http://khronosgroup.github.io/OpenCL-CLHPP/ + * + * The latest version of this header can be found on the GitHub releases page: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP/releases + * + * Bugs and patches can be submitted to the GitHub repository: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * The interface is contained with a single C++ header file \em opencl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings; it is enough to simply include \em opencl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * There are numerous compatibility, portability and memory management + * fixes in the new header as well as additional OpenCL 2.0 features. + * As a result the header is not directly backward compatible and for this + * reason we release it as opencl.hpp rather than a new version of cl.hpp. + * + * + * \section compatibility Compatibility + * Due to the evolution of the underlying OpenCL API the 2.0 C++ bindings + * include an updated approach to defining supported feature versions + * and the range of valid underlying OpenCL runtime versions supported. + * + * The combination of preprocessor macros CL_HPP_TARGET_OPENCL_VERSION and + * CL_HPP_MINIMUM_OPENCL_VERSION control this range. These are three digit + * decimal values representing OpenCL runime versions. The default for + * the target is 200, representing OpenCL 2.0 and the minimum is also + * defined as 200. These settings would use 2.0 API calls only. + * If backward compatibility with a 1.2 runtime is required, the minimum + * version may be set to 120. + * + * Note that this is a compile-time setting, and so affects linking against + * a particular SDK version rather than the versioning of the loaded runtime. + * + * The earlier versions of the header included basic vector and string + * classes based loosely on STL versions. These were difficult to + * maintain and very rarely used. For the 2.0 header we now assume + * the presence of the standard library unless requested otherwise. + * We use std::array, std::vector, std::shared_ptr and std::string + * throughout to safely manage memory and reduce the chance of a + * recurrance of earlier memory management bugs. + * + * These classes are used through typedefs in the cl namespace: + * cl::array, cl::vector, cl::pointer and cl::string. + * In addition cl::allocate_pointer forwards to std::allocate_shared + * by default. + * In all cases these standard library classes can be replaced with + * custom interface-compatible versions using the CL_HPP_NO_STD_ARRAY, + * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_UNIQUE_PTR and + * CL_HPP_NO_STD_STRING macros. + * + * The OpenCL 1.x versions of the C++ bindings included a size_t wrapper + * class to interface with kernel enqueue. This caused unpleasant interactions + * with the standard size_t declaration and led to namespacing bugs. + * In the 2.0 version we have replaced this with a std::array-based interface. + * However, the old behaviour can be regained for backward compatibility + * using the CL_HPP_ENABLE_SIZE_T_COMPATIBILITY macro. + * + * Finally, the program construction interface used a clumsy vector-of-pairs + * design in the earlier versions. We have replaced that with a cleaner + * vector-of-vectors and vector-of-strings design. However, for backward + * compatibility old behaviour can be regained with the + * CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY macro. + * + * In OpenCL 2.0 OpenCL C is not entirely backward compatibility with + * earlier versions. As a result a flag must be passed to the OpenCL C + * compiled to request OpenCL 2.0 compilation of kernels with 1.2 as + * the default in the absence of the flag. + * In some cases the C++ bindings automatically compile code for ease. + * For those cases the compilation defaults to OpenCL C 2.0. + * If this is not wanted, the CL_HPP_CL_1_2_DEFAULT_BUILD macro may + * be specified to assume 1.2 compilation. + * If more fine-grained decisions on a per-kernel bases are required + * then explicit build operations that take the flag should be used. + * + * + * \section parameterization Parameters + * This header may be parameterized by a set of preprocessor macros. + * + * - CL_HPP_TARGET_OPENCL_VERSION + * + * Defines the target OpenCL runtime version to build the header + * against. Defaults to 200, representing OpenCL 2.0. + * + * - CL_HPP_NO_STD_STRING + * + * Do not use the standard library string class. cl::string is not + * defined and may be defined by the user before opencl.hpp is + * included. + * + * - CL_HPP_NO_STD_VECTOR + * + * Do not use the standard library vector class. cl::vector is not + * defined and may be defined by the user before opencl.hpp is + * included. + * + * - CL_HPP_NO_STD_ARRAY + * + * Do not use the standard library array class. cl::array is not + * defined and may be defined by the user before opencl.hpp is + * included. + * + * - CL_HPP_NO_STD_UNIQUE_PTR + * + * Do not use the standard library unique_ptr class. cl::pointer and + * the cl::allocate_pointer functions are not defined and may be + * defined by the user before opencl.hpp is included. + * + * - CL_HPP_ENABLE_DEVICE_FISSION + * + * Enables device fission for OpenCL 1.2 platforms. + * + * - CL_HPP_ENABLE_EXCEPTIONS + * + * Enable exceptions for use in the C++ bindings header. This is the + * preferred error handling mechanism but is not required. + * + * - CL_HPP_ENABLE_SIZE_T_COMPATIBILITY + * + * Backward compatibility option to support cl.hpp-style size_t + * class. Replaces the updated std::array derived version and + * removal of size_t from the namespace. Note that in this case the + * new size_t class is placed in the cl::compatibility namespace and + * thus requires an additional using declaration for direct backward + * compatibility. + * + * - CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY + * + * Enable older vector of pairs interface for construction of + * programs. + * + * - CL_HPP_CL_1_2_DEFAULT_BUILD + * + * Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0 + * applies to use of cl::Program construction and other program + * build variants. + * + * - CL_HPP_USE_CL_SUB_GROUPS_KHR + * + * Enable the cl_khr_subgroups extension. + * + * - CL_HPP_USE_IL_KHR + * + * Enable the cl_khr_il_program extension. + * + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * \code + #define CL_HPP_ENABLE_EXCEPTIONS + #define CL_HPP_TARGET_OPENCL_VERSION 200 + + #include + #include + #include + #include + #include + + const int numElements = 32; + + int main(void) + { + // Filter for a 2.0 platform and set it as the default + std::vector platforms; + cl::Platform::get(&platforms); + cl::Platform plat; + for (auto &p : platforms) { + std::string platver = p.getInfo(); + if (platver.find("OpenCL 2.") != std::string::npos) { + plat = p; + } + } + if (plat() == 0) { + std::cout << "No OpenCL 2.0 platform found."; + return -1; + } + + cl::Platform newP = cl::Platform::setDefault(plat); + if (newP != plat) { + std::cout << "Error setting default platform."; + return -1; + } + + // Use C++11 raw string literals for kernel source code + std::string kernel1{R"CLC( + global int globalA; + kernel void updateGlobal() + { + globalA = 75; + } + )CLC"}; + std::string kernel2{R"CLC( + typedef struct { global int *bar; } Foo; + kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, + global int *output, int val, write_only pipe int outPipe, queue_t childQueue) + { + output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar); + write_pipe(outPipe, &val); + queue_t default_queue = get_default_queue(); + ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); + + // Have a child kernel write into third quarter of output + enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*2 + get_global_id(0)] = + inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA; + }); + + // Have a child kernel write into last quarter of output + enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*3 + get_global_id(0)] = + inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2; + }); + } + )CLC"}; + + // New simpler string interface style + std::vector programStrings {kernel1, kernel2}; + + cl::Program vectorAddProgram(programStrings); + try { + vectorAddProgram.build("-cl-std=CL2.0"); + } + catch (...) { + // Print build info for all devices + cl_int buildErr = CL_SUCCESS; + auto buildInfo = vectorAddProgram.getBuildInfo(&buildErr); + for (auto &pair : buildInfo) { + std::cerr << pair.second << std::endl << std::endl; + } + + return 1; + } + + typedef struct { int *bar; } Foo; + + // Get and run kernel that initializes the program-scope global + // A test for kernels that take no arguments + auto program2Kernel = + cl::KernelFunctor<>(vectorAddProgram, "updateGlobal"); + program2Kernel( + cl::EnqueueArgs( + cl::NDRange(1))); + + ////////////////// + // SVM allocations + + auto anSVMInt = cl::allocate_svm>(); + *anSVMInt = 5; + cl::SVMAllocator>> svmAllocReadOnly; + auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); + fooPointer->bar = anSVMInt.get(); + cl::SVMAllocator> svmAlloc; + std::vector>> inputA(numElements, 1, svmAlloc); + cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); + + // + ////////////// + + // Traditional cl_mem allocations + std::vector output(numElements, 0xdeadbeef); + cl::Buffer outputBuffer(begin(output), end(output), false); + cl::Pipe aPipe(sizeof(cl_int), numElements / 2); + + // Default command queue, also passed in as a parameter + cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( + cl::Context::getDefault(), cl::Device::getDefault()); + + auto vectorAddKernel = + cl::KernelFunctor< + decltype(fooPointer)&, + int*, + cl::coarse_svm_vector&, + cl::Buffer, + int, + cl::Pipe&, + cl::DeviceCommandQueue + >(vectorAddProgram, "vectorAdd"); + + // Ensure that the additional SVM pointer is available to the kernel + // This one was not passed as a parameter + vectorAddKernel.setSVMPointers(anSVMInt); + + // Hand control of coarse allocations to runtime + cl::enqueueUnmapSVM(anSVMInt); + cl::enqueueUnmapSVM(fooPointer); + cl::unmapSVM(inputB); + cl::unmapSVM(output2); + + cl_int error; + vectorAddKernel( + cl::EnqueueArgs( + cl::NDRange(numElements/2), + cl::NDRange(numElements/2)), + fooPointer, + inputA.data(), + inputB, + outputBuffer, + 3, + aPipe, + defaultDeviceQueue, + error + ); + + cl::copy(outputBuffer, begin(output), end(output)); + // Grab the SVM output vector using a map + cl::mapSVM(output2); + + cl::Device d = cl::Device::getDefault(); + + std::cout << "Output:\n"; + for (int i = 1; i < numElements; ++i) { + std::cout << "\t" << output[i] << "\n"; + } + std::cout << "\n\n"; + + return 0; + } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +/* Handle deprecated preprocessor definitions. In each case, we only check for + * the old name if the new name is not defined, so that user code can define + * both and hence work with either version of the bindings. + */ +#if !defined(CL_HPP_USE_DX_INTEROP) && defined(USE_DX_INTEROP) +# pragma message("opencl.hpp: USE_DX_INTEROP is deprecated. Define CL_HPP_USE_DX_INTEROP instead") +# define CL_HPP_USE_DX_INTEROP +#endif +#if !defined(CL_HPP_USE_CL_DEVICE_FISSION) && defined(USE_CL_DEVICE_FISSION) +# pragma message("opencl.hpp: USE_CL_DEVICE_FISSION is deprecated. Define CL_HPP_USE_CL_DEVICE_FISSION instead") +# define CL_HPP_USE_CL_DEVICE_FISSION +#endif +#if !defined(CL_HPP_ENABLE_EXCEPTIONS) && defined(__CL_ENABLE_EXCEPTIONS) +# pragma message("opencl.hpp: __CL_ENABLE_EXCEPTIONS is deprecated. Define CL_HPP_ENABLE_EXCEPTIONS instead") +# define CL_HPP_ENABLE_EXCEPTIONS +#endif +#if !defined(CL_HPP_NO_STD_VECTOR) && defined(__NO_STD_VECTOR) +# pragma message("opencl.hpp: __NO_STD_VECTOR is deprecated. Define CL_HPP_NO_STD_VECTOR instead") +# define CL_HPP_NO_STD_VECTOR +#endif +#if !defined(CL_HPP_NO_STD_STRING) && defined(__NO_STD_STRING) +# pragma message("opencl.hpp: __NO_STD_STRING is deprecated. Define CL_HPP_NO_STD_STRING instead") +# define CL_HPP_NO_STD_STRING +#endif +#if defined(VECTOR_CLASS) +# pragma message("opencl.hpp: VECTOR_CLASS is deprecated. Alias cl::vector instead") +#endif +#if defined(STRING_CLASS) +# pragma message("opencl.hpp: STRING_CLASS is deprecated. Alias cl::string instead.") +#endif +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) && defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +# pragma message("opencl.hpp: __CL_USER_OVERRIDE_ERROR_STRINGS is deprecated. Define CL_HPP_USER_OVERRIDE_ERROR_STRINGS instead") +# define CL_HPP_USER_OVERRIDE_ERROR_STRINGS +#endif + +/* Warn about features that are no longer supported + */ +#if defined(__USE_DEV_VECTOR) +# pragma message("opencl.hpp: __USE_DEV_VECTOR is no longer supported. Expect compilation errors") +#endif +#if defined(__USE_DEV_STRING) +# pragma message("opencl.hpp: __USE_DEV_STRING is no longer supported. Expect compilation errors") +#endif + +/* Detect which version to target */ +#if !defined(CL_HPP_TARGET_OPENCL_VERSION) +# pragma message("opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not defined. It will default to 220 (OpenCL 2.2)") +# define CL_HPP_TARGET_OPENCL_VERSION 220 +#endif +#if CL_HPP_TARGET_OPENCL_VERSION != 100 && \ + CL_HPP_TARGET_OPENCL_VERSION != 110 && \ + CL_HPP_TARGET_OPENCL_VERSION != 120 && \ + CL_HPP_TARGET_OPENCL_VERSION != 200 && \ + CL_HPP_TARGET_OPENCL_VERSION != 210 && \ + CL_HPP_TARGET_OPENCL_VERSION != 220 && \ + CL_HPP_TARGET_OPENCL_VERSION != 300 +# pragma message("opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 220") +# undef CL_HPP_TARGET_OPENCL_VERSION +# define CL_HPP_TARGET_OPENCL_VERSION 220 +#endif + +/* Forward target OpenCL version to C headers if necessary */ +#if defined(CL_TARGET_OPENCL_VERSION) +/* Warn if prior definition of CL_TARGET_OPENCL_VERSION is lower than + * requested C++ bindings version */ +#if CL_TARGET_OPENCL_VERSION < CL_HPP_TARGET_OPENCL_VERSION +# pragma message("CL_TARGET_OPENCL_VERSION is already defined as is lower than CL_HPP_TARGET_OPENCL_VERSION") +#endif +#else +# define CL_TARGET_OPENCL_VERSION CL_HPP_TARGET_OPENCL_VERSION +#endif + +#if !defined(CL_HPP_MINIMUM_OPENCL_VERSION) +# define CL_HPP_MINIMUM_OPENCL_VERSION 200 +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION != 100 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 110 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 120 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 200 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 210 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 220 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 300 +# pragma message("opencl.hpp: CL_HPP_MINIMUM_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 100") +# undef CL_HPP_MINIMUM_OPENCL_VERSION +# define CL_HPP_MINIMUM_OPENCL_VERSION 100 +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION > CL_HPP_TARGET_OPENCL_VERSION +# error "CL_HPP_MINIMUM_OPENCL_VERSION must not be greater than CL_HPP_TARGET_OPENCL_VERSION" +#endif + +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +# define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +# define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) +# define CL_USE_DEPRECATED_OPENCL_2_2_APIS +#endif + +#ifdef _WIN32 + +#include + +#if defined(CL_HPP_USE_DX_INTEROP) +#include +#include +#endif +#endif // _WIN32 + +#if defined(_MSC_VER) +#include +#endif // _MSC_VER + + // Check for a valid C++ version + +// Need to do both tests here because for some reason __cplusplus is not +// updated in visual studio +#if (!defined(_MSC_VER) && __cplusplus < 201103L) || (defined(_MSC_VER) && _MSC_VER < 1700) +#error Visual studio 2013 or another C++11-supporting compiler required +#endif + +// +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) || defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) +#include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#else +#include +#endif // !__APPLE__ + +#if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L ) +#define CL_HPP_NOEXCEPT_ noexcept +#else +#define CL_HPP_NOEXCEPT_ +#endif + +#if __cplusplus >= 201703L +# define CL_HPP_DEFINE_STATIC_MEMBER_ inline +#elif defined(_MSC_VER) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __declspec(selectany) +#elif defined(__MINGW32__) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((selectany)) +#else +# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((weak)) +#endif // !_MSC_VER + +// Define deprecated prefixes and suffixes to ensure compilation +// in case they are not pre-defined +#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include +#include +#include +#include +#include +#include + + +// Define a size_type to represent a correctly resolved size_t +#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + using size_type = ::size_t; +} // namespace cl +#else // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + using size_type = size_t; +} // namespace cl +#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) + + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) +#include +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + +#if !defined(CL_HPP_NO_STD_VECTOR) +#include +namespace cl { + template < class T, class Alloc = std::allocator > + using vector = std::vector; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_VECTOR) + +#if !defined(CL_HPP_NO_STD_STRING) +#include +namespace cl { + using string = std::string; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_STRING) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +#include +namespace cl { + // Replace unique_ptr and allocate_pointer for internal use + // to allow user to replace them + template + using pointer = std::unique_ptr; +} // namespace cl +#endif +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if !defined(CL_HPP_NO_STD_ARRAY) +#include +namespace cl { + template < class T, size_type N > + using array = std::array; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_ARRAY) + +// Define size_type appropriately to allow backward-compatibility +// use of the old size_t interface class +#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + namespace compatibility { + /*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ + template + class size_t + { + private: + size_type data_[N]; + + public: + //! \brief Initialize size_t to all 0s + size_t() + { + for (int i = 0; i < N; ++i) { + data_[i] = 0; + } + } + + size_t(const array &rhs) + { + for (int i = 0; i < N; ++i) { + data_[i] = rhs[i]; + } + } + + size_type& operator[](int index) + { + return data_[index]; + } + + const size_type& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator size_type* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const size_type* () const { return data_; } + + operator array() const + { + array ret; + + for (int i = 0; i < N; ++i) { + ret[i] = data_[i]; + } + return ret; + } + }; + } // namespace compatibility + + template + using size_t = compatibility::size_t; +} // namespace cl +#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) + +// Helper alias to avoid confusing the macros +namespace cl { + namespace detail { + using size_t_array = array; + } // namespace detail +} // namespace cl + + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + class Memory; + +#define CL_HPP_INIT_CL_EXT_FCN_PTR_(name) \ + if (!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if (!pfn_##name) { \ + } \ + } + +#define CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, name) \ + if (!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if (!pfn_##name) { \ + } \ + } + + class Program; + class Device; + class Context; + class CommandQueue; + class DeviceCommandQueue; + class Memory; + class Buffer; + class Pipe; + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + /*! \brief Exception class + * + * This may be thrown by API functions when CL_HPP_ENABLE_EXCEPTIONS is defined. + */ + class Error : public std::exception + { + private: + cl_int err_; + const char * errStr_; + public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } + }; +#define CL_HPP_ERR_STR_(x) #x +#else +#define CL_HPP_ERR_STR_(x) NULL +#endif // CL_HPP_ENABLE_EXCEPTIONS + + +namespace detail +{ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + (void) errStr; // suppress unused variable warning + return err; +} +#endif // CL_HPP_ENABLE_EXCEPTIONS +} + + + +//! \cond DOXYGEN_DETAIL +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR CL_HPP_ERR_STR_(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR CL_HPP_ERR_STR_(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR CL_HPP_ERR_STR_(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR CL_HPP_ERR_STR_(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR CL_HPP_ERR_STR_(clGetContextInfo) +#define __GET_EVENT_INFO_ERR CL_HPP_ERR_STR_(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR CL_HPP_ERR_STR_(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR CL_HPP_ERR_STR_(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR CL_HPP_ERR_STR_(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR CL_HPP_ERR_STR_(clGetKernelInfo) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __GET_KERNEL_ARG_INFO_ERR CL_HPP_ERR_STR_(clGetKernelArgInfo) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __GET_KERNEL_SUB_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelSubGroupInfo) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __GET_KERNEL_WORK_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR CL_HPP_ERR_STR_(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR CL_HPP_ERR_STR_(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR CL_HPP_ERR_STR_(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR CL_HPP_ERR_STR_(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR CL_HPP_ERR_STR_(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR CL_HPP_ERR_STR_(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR CL_HPP_ERR_STR_(clCreateBuffer) +#define __COPY_ERR CL_HPP_ERR_STR_(cl::copy) +#define __CREATE_SUBBUFFER_ERR CL_HPP_ERR_STR_(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetGLObjectInfo) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_IMAGE_ERR CL_HPP_ERR_STR_(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR CL_HPP_ERR_STR_(Incorrect image dimensions) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR CL_HPP_ERR_STR_(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR CL_HPP_ERR_STR_(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR CL_HPP_ERR_STR_(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR CL_HPP_ERR_STR_(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clWaitForEvents) + +#define __CREATE_KERNEL_ERR CL_HPP_ERR_STR_(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR CL_HPP_ERR_STR_(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR CL_HPP_ERR_STR_(clCreateProgramWithSource) +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __CREATE_PROGRAM_WITH_BINARY_ERR CL_HPP_ERR_STR_(clCreateProgramWithBinary) +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR CL_HPP_ERR_STR_(clCreateProgramWithBuiltInKernels) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __BUILD_PROGRAM_ERR CL_HPP_ERR_STR_(clBuildProgram) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __COMPILE_PROGRAM_ERR CL_HPP_ERR_STR_(clCompileProgram) +#define __LINK_PROGRAM_ERR CL_HPP_ERR_STR_(clLinkProgram) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_KERNELS_IN_PROGRAM_ERR CL_HPP_ERR_STR_(clCreateKernelsInProgram) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateCommandQueueWithProperties) +#define __CREATE_SAMPLER_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateSamplerWithProperties) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __SET_COMMAND_QUEUE_PROPERTY_ERR CL_HPP_ERR_STR_(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueFillBuffer) +#define __ENQUEUE_READ_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueFillImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR CL_HPP_ERR_STR_(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR CL_HPP_ERR_STR_(clEnqueueNDRangeKernel) +#define __ENQUEUE_NATIVE_KERNEL CL_HPP_ERR_STR_(clEnqueueNativeKernel) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR CL_HPP_ERR_STR_(clEnqueueMigrateMemObjects) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __ENQUEUE_MIGRATE_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMigrateMem) +#define __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clSetDefaultDeviceCommandQueue) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 + + +#define __ENQUEUE_ACQUIRE_GL_ERR CL_HPP_ERR_STR_(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR CL_HPP_ERR_STR_(clEnqueueReleaseGLObjects) + +#define __CREATE_PIPE_ERR CL_HPP_ERR_STR_(clCreatePipe) +#define __GET_PIPE_INFO_ERR CL_HPP_ERR_STR_(clGetPipeInfo) + + +#define __RETAIN_ERR CL_HPP_ERR_STR_(Retain Object) +#define __RELEASE_ERR CL_HPP_ERR_STR_(Release Object) +#define __FLUSH_ERR CL_HPP_ERR_STR_(clFlush) +#define __FINISH_ERR CL_HPP_ERR_STR_(clFinish) +#define __VECTOR_CAPACITY_ERR CL_HPP_ERR_STR_(Vector capacity error) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __GET_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetHostTimer) +#define __GET_DEVICE_AND_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetDeviceAndHostTimer) +#endif +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +#define __SET_PROGRAM_RELEASE_CALLBACK_ERR CL_HPP_ERR_STR_(clSetProgramReleaseCallback) +#define __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR CL_HPP_ERR_STR_(clSetProgramSpecializationConstant) +#endif + + +/** + * CL 1.2 version that uses device fission. + */ +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevices) +#else +#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevicesEXT) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __ENQUEUE_MARKER_ERR CL_HPP_ERR_STR_(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR CL_HPP_ERR_STR_(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR CL_HPP_ERR_STR_(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR CL_HPP_ERR_STR_(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR CL_HPP_ERR_STR_(clCreateImage3D) +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +/** + * Deprecated APIs for 2.0 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define __CREATE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clCreateCommandQueue) +#define __ENQUEUE_TASK_ERR CL_HPP_ERR_STR_(clEnqueueTask) +#define __CREATE_SAMPLER_ERR CL_HPP_ERR_STR_(clCreateSampler) +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +/** + * CL 1.2 marker and barrier commands + */ +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __ENQUEUE_MARKER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueBarrierWithWaitList) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +#define __CLONE_KERNEL_ERR CL_HPP_ERR_STR_(clCloneKernel) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 + +#endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + + +namespace detail { + +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) +{ + return f(name, sizeof(T), param, NULL); +} + +// Specialized for getInfo +// Assumes that the output vector was correctly resized on the way in +template +inline cl_int getInfoHelper(Func f, cl_uint name, vector>* param, int) +{ + if (name != CL_PROGRAM_BINARIES) { + return CL_INVALID_VALUE; + } + if (param) { + // Create array of pointers, calculate total size and pass pointer array in + size_type numBinaries = param->size(); + vector binariesPointers(numBinaries); + + for (size_type i = 0; i < numBinaries; ++i) + { + binariesPointers[i] = (*param)[i].data(); + } + + cl_int err = f(name, numBinaries * sizeof(unsigned char*), binariesPointers.data(), NULL); + + if (err != CL_SUCCESS) { + return err; + } + } + + + return CL_SUCCESS; +} + +// Specialized getInfoHelper for vector params +template +inline cl_int getInfoHelper(Func f, cl_uint name, vector* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + const size_type elements = required / sizeof(T); + + // Temporary to avoid changing param on an error + vector localData(elements); + err = f(name, required, localData.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + if (param) { + *param = std::move(localData); + } + + return CL_SUCCESS; +} + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper( + Func f, cl_uint name, vector* param, int, typename T::cl_type = 0) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + const size_type elements = required / sizeof(typename T::cl_type); + + vector value(elements); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + + if (param) { + // Assign to convert CL type to T for each element + param->resize(elements); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < elements; i++) { + (*param)[i] = T(value[i], true); + } + } + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for string params +template +inline cl_int getInfoHelper(Func f, cl_uint name, string* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + // std::string has a constant data member + // a char vector does not + if (required > 0) { + vector value(required); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + if (param) { + param->assign(begin(value), prev(end(value))); + } + } + else if (param) { + param->assign(""); + } + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for clsize_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, array* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + size_type elements = required / sizeof(size_type); + vector value(elements, 0); + + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + + // Bound the copy with N to prevent overruns + // if passed N > than the amount copied + if (elements > N) { + elements = N; + } + for (size_type i = 0; i < elements; ++i) { + (*param)[i] = value[i]; + } + + return CL_SUCCESS; +} + +template struct ReferenceHandler; + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + if (value != NULL) + { + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } + } + return CL_SUCCESS; +} + +#define CL_HPP_PARAM_NAME_INFO_1_0_(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, string) \ + F(cl_platform_info, CL_PLATFORM_VERSION, string) \ + F(cl_platform_info, CL_PLATFORM_NAME, string) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, string) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, string) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, cl::vector) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_type) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, string) \ + F(cl_device_info, CL_DEVICE_VENDOR, string) \ + F(cl_device_info, CL_DRIVER_VERSION, string) \ + F(cl_device_info, CL_DEVICE_PROFILE, string) \ + F(cl_device_info, CL_DEVICE_VERSION, string) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, string) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, cl::vector) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, cl::vector) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, size_type) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_WIDTH, size_type) \ + F(cl_image_info, CL_IMAGE_HEIGHT, size_type) \ + F(cl_image_info, CL_IMAGE_DEPTH, size_type) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_SOURCE, string) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_BINARIES, cl::vector>) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, string) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, string) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, string) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, size_type) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::detail::size_t_array) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + + +#define CL_HPP_PARAM_NAME_INFO_1_1_(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, string) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, size_type) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) + +#define CL_HPP_PARAM_NAME_INFO_1_2_(F) \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, size_type) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, string) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, string) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ + \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ + F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, size_type) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ + \ + F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ + F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) + +#define CL_HPP_PARAM_NAME_INFO_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_QUEUES, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_EVENTS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_PIPE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities) \ + F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_COMPLETE, cl_ulong) \ + F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, cl_bool) \ + F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_PTRS, void**) \ + F(cl_command_queue_info, CL_QUEUE_SIZE, cl_uint) \ + F(cl_mem_info, CL_MEM_USES_SVM_POINTER, cl_bool) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, size_type) \ + F(cl_pipe_info, CL_PIPE_PACKET_SIZE, cl_uint) \ + F(cl_pipe_info, CL_PIPE_MAX_PACKETS, cl_uint) + +#define CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(F) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, size_type) + +#define CL_HPP_PARAM_NAME_INFO_IL_KHR_(F) \ + F(cl_device_info, CL_DEVICE_IL_VERSION_KHR, string) \ + F(cl_program_info, CL_PROGRAM_IL_KHR, cl::vector) + +#define CL_HPP_PARAM_NAME_INFO_2_1_(F) \ + F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, size_type) \ + F(cl_program_info, CL_PROGRAM_IL, cl::vector) \ + F(cl_kernel_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ + F(cl_kernel_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint) \ + F(cl_device_info, CL_DEVICE_IL_VERSION, string) \ + F(cl_device_info, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_bool) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE_DEFAULT, cl::DeviceCommandQueue) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) + +#define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ + F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ + F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, cl_bool) + +#define CL_HPP_PARAM_NAME_DEVICE_FISSION_(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, cl::vector) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, cl::vector) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_(F) \ + F(cl_platform_info, CL_PLATFORM_NUMERIC_VERSION_KHR, cl_version_khr) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ + \ + F(cl_device_info, CL_DEVICE_NUMERIC_VERSION_KHR, cl_version_khr) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR, cl_version_khr) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ + F(cl_device_info, CL_DEVICE_ILS_WITH_VERSION_KHR, cl::vector) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR, cl::vector) + +template +struct param_traits {}; + +#define CL_HPP_DECLARE_PARAM_TRAITS_(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +CL_HPP_PARAM_NAME_INFO_1_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +CL_HPP_PARAM_NAME_INFO_1_1_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +CL_HPP_PARAM_NAME_INFO_1_2_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +CL_HPP_PARAM_NAME_INFO_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 +CL_HPP_PARAM_NAME_INFO_2_1_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +CL_HPP_PARAM_NAME_INFO_2_2_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 + +#if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 +CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // #if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 + +#if defined(CL_HPP_USE_IL_KHR) +CL_HPP_PARAM_NAME_INFO_IL_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // #if defined(CL_HPP_USE_IL_KHR) + + +// Flags deprecated in OpenCL 2.0 +#define CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) + +#define CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) + +#define CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) + +// Include deprecated query flags based on versions +// Only include deprecated 1.0 flags if 2.0 not active as there is an enum clash +#if CL_HPP_TARGET_OPENCL_VERSION > 100 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 && CL_HPP_TARGET_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 110 +#if CL_HPP_TARGET_OPENCL_VERSION > 110 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 +#if CL_HPP_TARGET_OPENCL_VERSION > 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) +CL_HPP_PARAM_NAME_DEVICE_FISSION_(CL_HPP_DECLARE_PARAM_TRAITS_); +#endif // CL_HPP_USE_CL_DEVICE_FISSION + +#if defined(cl_khr_extended_versioning) +CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_(CL_HPP_DECLARE_PARAM_TRAITS_); +#endif // cl_khr_extended_versioning + +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif + +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, vector) +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +#endif + +#ifdef CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, cl_ulong) +#endif +#ifdef CL_DEVICE_JOB_SLOTS_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_JOB_SLOTS_ARM, cl_uint) +#endif +#ifdef CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, cl_bitfield) +#endif +#ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM, cl_uint) +#endif +#ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM, cl_int) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif + +// Convenience functions + +template +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return getInfoHelper(f, name, param, 0); +} + +template +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, size_type size, void* value, size_type* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, size_type size, void* value, size_type* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0 f0 = { f, arg0 }; + return getInfoHelper(f0, name, param, 0); +} + +template +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1 f0 = { f, arg0, arg1 }; + return getInfoHelper(f0, name, param, 0); +} + + +template +struct ReferenceHandler +{ }; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/** + * OpenCL 1.2 devices do have retain/release. + */ +template <> +struct ReferenceHandler +{ + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) + { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) + { return ::clReleaseDevice(device); } +}; +#else // CL_HPP_TARGET_OPENCL_VERSION >= 120 +/** + * OpenCL 1.1 devices do not have retain/release. + */ +template <> +struct ReferenceHandler +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_SUCCESS; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_SUCCESS; } +}; +#endif // ! (CL_HPP_TARGET_OPENCL_VERSION >= 120) + +template <> +struct ReferenceHandler +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_SUCCESS; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_SUCCESS; } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 +// Extracts version number with major in the upper 16 bits, minor in the lower 16 +static cl_uint getVersion(const vector &versionInfo) +{ + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while(versionInfo[index] != '.' ) { + highVersion *= 10; + highVersion += versionInfo[index]-'0'; + ++index; + } + ++index; + while(versionInfo[index] != ' ' && versionInfo[index] != '\0') { + lowVersion *= 10; + lowVersion += versionInfo[index]-'0'; + ++index; + } + return (highVersion << 16) | lowVersion; +} + +static cl_uint getPlatformVersion(cl_platform_id platform) +{ + size_type size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); + + vector versionInfo(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, versionInfo.data(), &size); + return getVersion(versionInfo); +} + +static cl_uint getDevicePlatformVersion(cl_device_id device) +{ + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); + return getPlatformVersion(platform); +} + +static cl_uint getContextPlatformVersion(cl_context context) +{ + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + size_type size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + if (size == 0) + return 0; + vector devices(size/sizeof(cl_device_id)); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), NULL); + return getDevicePlatformVersion(devices[0]); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 + +template +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + Wrapper(const cl_type &obj, bool retainObject) : object_(obj) + { + if (retainObject) { + detail::errHandler(retain(), __RETAIN_ERR); + } + } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + detail::errHandler(retain(), __RETAIN_ERR); + } + + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ + { + object_ = rhs.object_; + rhs.object_ = NULL; + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + detail::errHandler(retain(), __RETAIN_ERR); + } + return *this; + } + + Wrapper& operator = (Wrapper&& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + rhs.object_ = NULL; + } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs; + return *this; + } + + const cl_type& operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + + cl_type get() const { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + cl_int retain() const + { + if (object_ != nullptr) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if (object_ != nullptr) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +template <> +class Wrapper +{ +public: + typedef cl_device_id cl_type; + +protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) + { + bool retVal = false; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + if (device != NULL) { + int version = getDevicePlatformVersion(device); + if(version > ((1 << 16) + 1)) { + retVal = true; + } + } +#else // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + retVal = true; +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + return retVal; + } + +public: + Wrapper() : object_(NULL), referenceCountable_(false) + { + } + + Wrapper(const cl_type &obj, bool retainObject) : + object_(obj), + referenceCountable_(false) + { + referenceCountable_ = isReferenceCountable(obj); + + if (retainObject) { + detail::errHandler(retain(), __RETAIN_ERR); + } + } + + ~Wrapper() + { + release(); + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + detail::errHandler(retain(), __RETAIN_ERR); + } + + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ + { + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = NULL; + rhs.referenceCountable_ = false; + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + detail::errHandler(retain(), __RETAIN_ERR); + } + return *this; + } + + Wrapper& operator = (Wrapper&& rhs) + { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = NULL; + rhs.referenceCountable_ = false; + } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + const cl_type& operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + + cl_type get() const { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, vector*, int, typename U::cl_type); + + cl_int retain() const + { + if( object_ != nullptr && referenceCountable_ ) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if (object_ != nullptr && referenceCountable_) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; + +template +inline bool operator==(const Wrapper &lhs, const Wrapper &rhs) +{ + return lhs() == rhs(); +} + +template +inline bool operator!=(const Wrapper &lhs, const Wrapper &rhs) +{ + return !operator==(lhs, rhs); +} + +} // namespace detail +//! \endcond + + +using BuildLogType = vector::param_type>>; +#if defined(CL_HPP_ENABLE_EXCEPTIONS) +/** +* Exception class for build errors to carry build info +*/ +class BuildError : public Error +{ +private: + BuildLogType buildLogs; +public: + BuildError(cl_int err, const char * errStr, const BuildLogType &vec) : Error(err, errStr), buildLogs(vec) + { + } + + BuildLogType getBuildLog() const + { + return buildLogs; + } +}; +namespace detail { + static inline cl_int buildErrHandler( + cl_int err, + const char * errStr, + const BuildLogType &buildLogs) + { + if (err != CL_SUCCESS) { + throw BuildError(err, errStr, buildLogs); + } + return err; + } +} // namespace detail + +#else +namespace detail { + static inline cl_int buildErrHandler( + cl_int err, + const char * errStr, + const BuildLogType &buildLogs) + { + (void)buildLogs; // suppress unused variable warning + (void)errStr; + return err; + } +} // namespace detail +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + + +/*! \stuct ImageFormat + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format + */ +struct ImageFormat : public cl_image_format +{ + //! \brief Default constructor - performs no initialization. + ImageFormat(){} + + //! \brief Initializing constructor. + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + //! \brief Assignment operator. + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id + */ +class Device : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static Device default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault(); + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Device &p) { + default_ = p; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Device(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + //! \brief Default constructor - initializes to NULL. + Device() : detail::Wrapper() { } + + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + explicit Device(const cl_device_id &device, bool retainObject = false) : + detail::Wrapper(device, retainObject) { } + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault( + cl_int *errResult = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != NULL) { + *errResult = default_error_; + } + return default_; + } + + /** + * Modify the default device to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default device. + * Should be compared to the passed value to ensure that it was updated. + */ + static Device setDefault(const Device &default_device) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_device)); + detail::errHandler(default_error_); + return default_; + } + + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Device(const Device& dev) : detail::Wrapper(dev) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Device& operator = (const Device &dev) + { + detail::Wrapper::operator=(dev); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Device(Device&& dev) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(dev)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Device& operator = (Device &&dev) + { + detail::Wrapper::operator=(std::move(dev)); + return *this; + } + + //! \brief Wrapper for clGetDeviceInfo(). + template + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + //! \brief Wrapper for clGetDeviceInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Return the current value of the host clock as seen by the device. + * The resolution of the device timer may be queried with the + * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. + * @return The host timer value. + */ + cl_ulong getHostTimer(cl_int *error = nullptr) + { + cl_ulong retVal = 0; + cl_int err = + clGetHostTimer(this->get(), &retVal); + detail::errHandler( + err, + __GET_HOST_TIMER_ERR); + if (error) { + *error = err; + } + return retVal; + } + + /** + * Return a synchronized pair of host and device timestamps as seen by device. + * Use to correlate the clocks and get the host timer only using getHostTimer + * as a lower cost mechanism in between calls. + * The resolution of the host timer may be queried with the + * CL_PLATFORM_HOST_TIMER_RESOLUTION query. + * The resolution of the device timer may be queried with the + * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. + * @return A pair of (device timer, host timer) timer values. + */ + std::pair getDeviceAndHostTimer(cl_int *error = nullptr) + { + std::pair retVal; + cl_int err = + clGetDeviceAndHostTimer(this->get(), &(retVal.first), &(retVal.second)); + detail::errHandler( + err, + __GET_DEVICE_AND_HOST_TIMER_ERR); + if (error) { + *error = err; + } + return retVal; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + /** + * CL 1.2 version + */ +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + //! \brief Wrapper for clCreateSubDevices(). + cl_int createSubDevices( + const cl_device_partition_property * properties, + vector* devices) + { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + + vector ids(n); + err = clCreateSubDevices(object_, properties, n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } + } + + return CL_SUCCESS; + } +#elif defined(CL_HPP_USE_CL_DEVICE_FISSION) + +/** + * CL 1.1 version that uses device fission extension. + */ + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + vector* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + + vector ids(n); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } + } + return CL_SUCCESS; + } +#endif // defined(CL_HPP_USE_CL_DEVICE_FISSION) +}; + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Device::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Device Device::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Device::default_error_ = CL_SUCCESS; + +/*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id + */ +class Platform : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static Platform default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + // If default wasn't passed ,generate one + // Otherwise set it + cl_uint n = 0; + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + if (n == 0) { + default_error_ = CL_INVALID_PLATFORM; + return; + } + + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), NULL); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + + default_ = Platform(ids[0]); + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Platform &p) { + default_ = p; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Platform(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + //! \brief Default constructor - initializes to NULL. + Platform() : detail::Wrapper() { } + + /*! \brief Constructor from cl_platform_id. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This simply copies the platform ID value, which is an inexpensive operation. + */ + explicit Platform(const cl_platform_id &platform, bool retainObject = false) : + detail::Wrapper(platform, retainObject) { } + + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + static Platform getDefault( + cl_int *errResult = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != NULL) { + *errResult = default_error_; + } + return default_; + } + + /** + * Modify the default platform to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default platform. + * Should be compared to the passed value to ensure that it was updated. + */ + static Platform setDefault(const Platform &default_platform) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_platform)); + detail::errHandler(default_error_); + return default_; + } + + //! \brief Wrapper for clGetPlatformInfo(). + template + cl_int getInfo(cl_platform_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + //! \brief Wrapper for clGetPlatformInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ + cl_int getDevices( + cl_device_type type, + vector* devices) const + { + cl_uint n = 0; + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS && err != CL_DEVICE_NOT_FOUND) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + vector ids(n); + if (n>0) { + err = ::clGetDeviceIDs(object_, type, n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + (*devices)[i] = Device(ids[i], true); + } + } + return CL_SUCCESS; + } + +#if defined(CL_HPP_USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + vector* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(object_, clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + vector ids(n); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids.data(), + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + (*devices)[i] = Device(ids[i], true); + } + } + return CL_SUCCESS; + } +#endif + + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ + static cl_int get( + vector* platforms) + { + cl_uint n = 0; + + if( platforms == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + if (platforms) { + platforms->resize(ids.size()); + + // Platforms don't reference count + for (size_type i = 0; i < ids.size(); i++) { + (*platforms)[i] = Platform(ids[i]); + } + } + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get( + Platform * platform) + { + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (platform) { + *platform = default_platform; + } + return err; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * \return Returns a valid platform if one is available. + * If no platform is available will return a null platform. + * Throws an exception if no platforms are available + * or an error condition occurs. + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get( + cl_int * errResult = NULL) + { + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (errResult) { + *errResult = err; + } + return default_platform; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + //! \brief Wrapper for clUnloadCompiler(). + cl_int + unloadCompiler() + { + return ::clUnloadPlatformCompiler(object_); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +}; // class Platform + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Platform::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Platform Platform::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; + + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +/** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +/*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ +class Context + : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static Context default_; + static cl_int default_error_; + + /*! \brief Create the default context from the default device type in the default platform. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { +#if !defined(__APPLE__) && !defined(__MACOS) + const Platform &p = Platform::getDefault(); + cl_platform_id defaultPlatform = p(); + cl_context_properties properties[3] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)defaultPlatform, 0 + }; +#else // #if !defined(__APPLE__) && !defined(__MACOS) + cl_context_properties *properties = nullptr; +#endif // #if !defined(__APPLE__) && !defined(__MACOS) + + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + properties, + NULL, + NULL, + &default_error_); + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + + /*! \brief Create the default context from a provided Context. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Context &c) { + default_ = c; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Context(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ + Context( + const vector& devices, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + size_type, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateContext( + properties, (cl_uint) numDevices, + deviceIDs.data(), + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including a specific device. + * + * Wraps clCreateContext(). + */ + Context( + const Device& device, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + size_type, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext( + properties, 1, + &deviceID, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ + Context( + cl_device_type type, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + size_type, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + +#if !defined(__APPLE__) && !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + + if (properties == NULL) { + // Get a valid platform ID as we cannot send in a blank one + vector platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + + vector devices; + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try { +#endif + + error = platforms[i].getDevices(type, &devices); + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + } catch (cl::Error& e) { + error = e.err(); + } + // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type + // We do error checking next anyway, and can throw there if needed +#endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = CL_DEVICE_NOT_FOUND; + } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } +#endif + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Context(const Context& ctx) : detail::Wrapper(ctx) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Context& operator = (const Context &ctx) + { + detail::Wrapper::operator=(ctx); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Context(Context&& ctx) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(ctx)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Context& operator = (Context &&ctx) + { + detail::Wrapper::operator=(std::move(ctx)); + return *this; + } + + + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int * err = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + /** + * Modify the default context to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default context. + * Should be compared to the passed value to ensure that it was updated. + */ + static Context setDefault(const Context &default_context) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_context)); + detail::errHandler(default_error_); + return default_; + } + + //! \brief Default constructor - initializes to NULL. + Context() : detail::Wrapper() { } + + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + explicit Context(const cl_context& context, bool retainObject = false) : + detail::Wrapper(context, retainObject) { } + + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context& operator = (const cl_context& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). + template + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + //! \brief Wrapper for clGetContextInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + vector* formats) const + { + cl_uint numEntries; + + if (!formats) { + return CL_SUCCESS; + } + + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + if (numEntries > 0) { + vector value(numEntries); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*)value.data(), + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(begin(value), end(value)); + } + else { + // If no values are being returned, ensure an empty vector comes back + formats->clear(); + } + + return CL_SUCCESS; + } +}; + +inline void Device::makeDefault() +{ + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + cl_int error = 0; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + default_error_ = error; + } + else { + default_ = context.getInfo()[0]; + default_error_ = CL_SUCCESS; + } + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif +} + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Context::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Context Context::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Context::default_error_ = CL_SUCCESS; + +/*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event + */ +class Event : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Event() : detail::Wrapper() { } + + /*! \brief Constructor from cl_event - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + explicit Event(const cl_event& event, bool retainObject = false) : + detail::Wrapper(event, retainObject) { } + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ + Event& operator = (const cl_event& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). + template + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + //! \brief Wrapper for clGetEventInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + //! \brief Wrapper for clGetEventProfilingInfo(). + template + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. + template typename + detail::param_traits::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + static cl_int + waitForEvents(const vector& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), + __WAIT_FOR_EVENTS_ERR); + } +}; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +/*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. + */ +class UserEvent : public Event +{ +public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + UserEvent() : Event() { } + + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + +/*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ +inline static cl_int +WaitForEvents(const vector& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem + */ +class Memory : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Memory() : detail::Wrapper() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * Optionally transfer ownership of a refcount on the cl_mem + * into the new Memory object. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * + * See Memory for further details. + */ + explicit Memory(const cl_mem& memory, bool retainObject) : + detail::Wrapper(memory, retainObject) { } + + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Memory(const Memory& mem) : detail::Wrapper(mem) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Memory& operator = (const Memory &mem) + { + detail::Wrapper::operator=(mem); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Memory(Memory&& mem) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(mem)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Memory& operator = (Memory &&mem) + { + detail::Wrapper::operator=(std::move(mem)); + return *this; + } + + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + +}; + +// Pre-declare copy functions +class Buffer; +template< typename IteratorType > +cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +namespace detail +{ + class SVMTraitNull + { + public: + static cl_svm_mem_flags getSVMMemFlags() + { + return 0; + } + }; +} // namespace detail + +template +class SVMTraitReadWrite +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_READ_WRITE | + Trait::getSVMMemFlags(); + } +}; + +template +class SVMTraitReadOnly +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_READ_ONLY | + Trait::getSVMMemFlags(); + } +}; + +template +class SVMTraitWriteOnly +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_WRITE_ONLY | + Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitCoarse +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitFine +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_SVM_FINE_GRAIN_BUFFER | + Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitAtomic +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return + CL_MEM_SVM_FINE_GRAIN_BUFFER | + CL_MEM_SVM_ATOMICS | + Trait::getSVMMemFlags(); + } +}; + +// Pre-declare SVM map function +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL); + +/** + * STL-like allocator class for managing SVM objects provided for convenience. + * + * Note that while this behaves like an allocator for the purposes of constructing vectors and similar objects, + * care must be taken when using with smart pointers. + * The allocator should not be used to construct a unique_ptr if we are using coarse-grained SVM mode because + * the coarse-grained management behaviour would behave incorrectly with respect to reference counting. + * + * Instead the allocator embeds a Deleter which may be used with unique_ptr and is used + * with the allocate_shared and allocate_ptr supplied operations. + */ +template +class SVMAllocator { +private: + Context context_; + +public: + typedef T value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + template + struct rebind + { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; + + SVMAllocator() : + context_(Context::getDefault()) + { + } + + explicit SVMAllocator(cl::Context context) : + context_(context) + { + } + + + SVMAllocator(const SVMAllocator &other) : + context_(other.context_) + { + } + + template + SVMAllocator(const SVMAllocator &other) : + context_(other.context_) + { + } + + ~SVMAllocator() + { + } + + pointer address(reference r) CL_HPP_NOEXCEPT_ + { + return std::addressof(r); + } + + const_pointer address(const_reference r) CL_HPP_NOEXCEPT_ + { + return std::addressof(r); + } + + /** + * Allocate an SVM pointer. + * + * If the allocator is coarse-grained, this will take ownership to allow + * containers to correctly construct data in place. + */ + pointer allocate( + size_type size, + typename cl::SVMAllocator::const_pointer = 0) + { + // Allocate memory with default alignment matching the size of the type + void* voidPointer = + clSVMAlloc( + context_(), + SVMTrait::getSVMMemFlags(), + size*sizeof(T), + 0); + pointer retValue = reinterpret_cast( + voidPointer); +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + if (!retValue) { + std::bad_alloc excep; + throw excep; + } +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + + // If allocation was coarse-grained then map it + if (!(SVMTrait::getSVMMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { + cl_int err = enqueueMapSVM(retValue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, size*sizeof(T)); + if (err != CL_SUCCESS) { + std::bad_alloc excep; + throw excep; + } + } + + // If exceptions disabled, return null pointer from allocator + return retValue; + } + + void deallocate(pointer p, size_type) + { + clSVMFree(context_(), p); + } + + /** + * Return the maximum possible allocation size. + * This is the minimum of the maximum sizes of all devices in the context. + */ + size_type max_size() const CL_HPP_NOEXCEPT_ + { + size_type maxSize = std::numeric_limits::max() / sizeof(T); + + for (const Device &d : context_.getInfo()) { + maxSize = std::min( + maxSize, + static_cast(d.getInfo())); + } + + return maxSize; + } + + template< class U, class... Args > + void construct(U* p, Args&&... args) + { + new(p)T(args...); + } + + template< class U > + void destroy(U* p) + { + p->~U(); + } + + /** + * Returns true if the contexts match. + */ + inline bool operator==(SVMAllocator const& rhs) + { + return (context_==rhs.context_); + } + + inline bool operator!=(SVMAllocator const& a) + { + return !operator==(a); + } +}; // class SVMAllocator return cl::pointer(tmp, detail::Deleter{alloc, copies}); + + +template +class SVMAllocator { +public: + typedef void value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + + template + struct rebind + { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; +}; + +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +namespace detail +{ + template + class Deleter { + private: + Alloc alloc_; + size_type copies_; + + public: + typedef typename std::allocator_traits::pointer pointer; + + Deleter(const Alloc &alloc, size_type copies) : alloc_{ alloc }, copies_{ copies } + { + } + + void operator()(pointer ptr) const { + Alloc tmpAlloc{ alloc_ }; + std::allocator_traits::destroy(tmpAlloc, std::addressof(*ptr)); + std::allocator_traits::deallocate(tmpAlloc, ptr, copies_); + } + }; +} // namespace detail + +/** + * Allocation operation compatible with std::allocate_ptr. + * Creates a unique_ptr by default. + * This requirement is to ensure that the control block is not + * allocated in memory inaccessible to the host. + */ +template +cl::pointer> allocate_pointer(const Alloc &alloc_, Args&&... args) +{ + Alloc alloc(alloc_); + static const size_type copies = 1; + + // Ensure that creation of the management block and the + // object are dealt with separately such that we only provide a deleter + + T* tmp = std::allocator_traits::allocate(alloc, copies); + if (!tmp) { + std::bad_alloc excep; + throw excep; + } + try { + std::allocator_traits::construct( + alloc, + std::addressof(*tmp), + std::forward(args)...); + + return cl::pointer>(tmp, detail::Deleter{alloc, copies}); + } + catch (std::bad_alloc& b) + { + std::allocator_traits::deallocate(alloc, tmp, copies); + throw; + } +} + +template< class T, class SVMTrait, class... Args > +cl::pointer>> allocate_svm(Args... args) +{ + SVMAllocator alloc; + return cl::allocate_pointer(alloc, args...); +} + +template< class T, class SVMTrait, class... Args > +cl::pointer>> allocate_svm(const cl::Context &c, Args... args) +{ + SVMAllocator alloc(c); + return cl::allocate_pointer(alloc, args...); +} +#endif // #if !defined(CL_HPP_NO_STD_UNIQUE_PTR) + +/*! \brief Vector alias to simplify contruction of coarse-grained SVM containers. + * + */ +template < class T > +using coarse_svm_vector = vector>>; + +/*! \brief Vector alias to simplify contruction of fine-grained SVM containers. +* +*/ +template < class T > +using fine_svm_vector = vector>>; + +/*! \brief Vector alias to simplify contruction of fine-grained SVM containers that support platform atomics. +* +*/ +template < class T > +using atomic_svm_vector = vector>>; + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Buffer : public Memory +{ +public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer( + const Context& context, + cl_mem_flags flags, + size_type size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + size_type size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer( + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr = false, + cl_int* err = NULL) + { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + size_type size = sizeof(DataType)*(endIterator - startIterator); + + Context context = Context::getDefault(err); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified queue. + * If useHostPtr is specified iterators must be random access. + */ + template< typename IteratorType > + Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Buffer() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. + * + * See Memory for further details. + */ + explicit Buffer(const cl_mem& buffer, bool retainObject = false) : + Memory(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Buffer(const Buffer& buf) : Memory(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Buffer& operator = (const Buffer &buf) + { + Memory::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Buffer(Buffer&& buf) CL_HPP_NOEXCEPT_ : Memory(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Buffer& operator = (Buffer &&buf) + { + Memory::operator=(std::move(buf)); + return *this; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +}; + +#if defined (CL_HPP_USE_DX_INTEROP) +/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferD3D10 : public Buffer +{ +public: + + + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) : pfn_clCreateFromD3D10BufferKHR(nullptr) + { + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + vector props = context.getInfo(); + cl_platform platform = -1; + for( int i = 0; i < props.size(); ++i ) { + if( props[i] == CL_CONTEXT_PLATFORM ) { + platform = props[i+1]; + } + } + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateFromD3D10BufferKHR); +#elif CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateFromD3D10BufferKHR); +#endif + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferD3D10() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferD3D10(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10(const BufferD3D10& buf) : + Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10& operator = (const BufferD3D10 &buf) + { + Buffer::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferD3D10& operator = (BufferD3D10 &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } +}; +#endif + +/*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferGL : public Buffer +{ +public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ + BufferGL( + const Context& context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferGL() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferGL(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferGL(const BufferGL& buf) : Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferGL& operator = (const BufferGL &buf) + { + Buffer::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferGL& operator = (BufferGL &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + cl_GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief Class interface for GL Render Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class BufferRenderGL : public Buffer +{ +public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferRenderGL() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferRenderGL(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL(const BufferRenderGL& buf) : Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (const BufferRenderGL &buf) + { + Buffer::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (BufferRenderGL &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + cl_GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image : public Memory +{ +protected: + //! \brief Default constructor - initializes to NULL. + Image() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image(const cl_mem& image, bool retainObject = false) : + Memory(image, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image(const Image& img) : Memory(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image& operator = (const Image &img) + { + Memory::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image(Image&& img) CL_HPP_NOEXCEPT_ : Memory(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image& operator = (Image &&img) + { + Memory::operator=(std::move(img)); + return *this; + } + + +public: + //! \brief Wrapper for clGetImageInfo(). + template + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + //! \brief Wrapper for clGetImageInfo() that returns by value. + template typename + detail::param_traits::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image1D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D, + width, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image1D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1D(const cl_mem& image1D, bool retainObject = false) : + Image(image1D, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1D(const Image1D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1D& operator = (const Image1D &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1D(Image1D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1D& operator = (Image1D &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; + +/*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ +class Image1DBuffer : public Image +{ +public: + Image1DBuffer( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + const Buffer &buffer, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + width, + 0, 0, 0, 0, 0, 0, 0, + buffer() + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + NULL, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DBuffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DBuffer(const cl_mem& image1D, bool retainObject = false) : + Image(image1D, retainObject) { } + + Image1DBuffer& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer(const Image1DBuffer& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer& operator = (const Image1DBuffer &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DBuffer& operator = (Image1DBuffer &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; + +/*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ +class Image1DArray : public Image +{ +public: + Image1DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type arraySize, + size_type width, + size_type rowPitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_ARRAY, + width, + 0, 0, // height, depth (unused) + arraySize, + rowPitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DArray() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DArray(const cl_mem& imageArray, bool retainObject = false) : + Image(imageArray, retainObject) { } + + + Image1DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DArray(const Image1DArray& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image1DArray& operator = (const Image1DArray &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image1DArray& operator = (Image1DArray &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 + + +/*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image2D : public Image +{ +public: + /*! \brief Constructs a 2D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + size_type height, + size_type row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + if (!useCreateImage) + { + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 || defined(CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR) + /*! \brief Constructs a 2D Image from a buffer. + * \note This will share storage with the underlying buffer. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + ImageFormat format, + const Buffer &sourceBuffer, + size_type width, + size_type height, + size_type row_pitch = 0, + cl_int* err = nullptr) + { + cl_int error; + + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, + // Use buffer as input to image + sourceBuffer() + }; + object_ = ::clCreateImage( + context(), + 0, // flags inherited from buffer + &format, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { + *err = error; + } + } +#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 || defined(CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief Constructs a 2D Image from an image. + * \note This will share storage with the underlying image but may + * reinterpret the channel order and type. + * + * The image will be created matching with a descriptor matching the source. + * + * \param order is the channel order to reinterpret the image data as. + * The channel order may differ as described in the OpenCL + * 2.0 API specification. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_channel_order order, + const Image &sourceImage, + cl_int* err = nullptr) + { + cl_int error; + + // Descriptor fields have to match source image + size_type sourceWidth = + sourceImage.getImageInfo(); + size_type sourceHeight = + sourceImage.getImageInfo(); + size_type sourceRowPitch = + sourceImage.getImageInfo(); + cl_uint sourceNumMIPLevels = + sourceImage.getImageInfo(); + cl_uint sourceNumSamples = + sourceImage.getImageInfo(); + cl_image_format sourceFormat = + sourceImage.getImageInfo(); + + // Update only the channel order. + // Channel format inherited from source. + sourceFormat.image_channel_order = order; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + sourceWidth, + sourceHeight, + 0, 0, // depth (unused), array size (unused) + sourceRowPitch, + 0, // slice pitch (unused) + sourceNumMIPLevels, + sourceNumSamples, + // Use buffer as input to image + sourceImage() + }; + object_ = ::clCreateImage( + context(), + 0, // flags should be inherited from mem_object + &sourceFormat, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { + *err = error; + } + } +#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + //! \brief Default constructor - initializes to NULL. + Image2D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2D(const cl_mem& image2D, bool retainObject = false) : + Image(image2D, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2D(const Image2D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2D& operator = (const Image2D &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2D(Image2D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2D& operator = (Image2D &&img) + { + Image::operator=(std::move(img)); + return *this; + } + +}; + + +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +/*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. + */ +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D +{ +public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ + Image2DGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != NULL) { + *err = error; + } + + } + + //! \brief Default constructor - initializes to NULL. + Image2DGL() : Image2D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2DGL(const cl_mem& image, bool retainObject = false) : + Image2D(image, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + *c + * See Memory for further details. + */ + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DGL(const Image2DGL& img) : Image2D(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DGL& operator = (const Image2DGL &img) + { + Image2D::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT_ : Image2D(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DGL& operator = (Image2DGL &&img) + { + Image2D::operator=(std::move(img)); + return *this; + } + +} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ +class Image2DArray : public Image +{ +public: + Image2DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type arraySize, + size_type width, + size_type height, + size_type rowPitch, + size_type slicePitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D_ARRAY, + width, + height, + 0, // depth (unused) + arraySize, + rowPitch, + slicePitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DArray() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2DArray(const cl_mem& imageArray, bool retainObject = false) : Image(imageArray, retainObject) { } + + Image2DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DArray(const Image2DArray& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image2DArray& operator = (const Image2DArray &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image2DArray& operator = (Image2DArray &&img) + { + Image::operator=(std::move(img)); + return *this; + } +}; +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 + +/*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3D : public Image +{ +public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + size_type width, + size_type height, + size_type depth, + size_type row_pitch = 0, + size_type slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + bool useCreateImage; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE3D, + width, + height, + depth, + 0, // array size (unused) + row_pitch, + slice_pitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + if (!useCreateImage) + { + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + } + + //! \brief Default constructor - initializes to NULL. + Image3D() : Image() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image3D(const cl_mem& image3D, bool retainObject = false) : + Image(image3D, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3D(const Image3D& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3D& operator = (const Image3D &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3D(Image3D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3D& operator = (Image3D &&img) + { + Image::operator=(std::move(img)); + return *this; + } +}; + +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +/*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image3DGL : public Image3D +{ +public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ + Image3DGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image3DGL() : Image3D() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image3DGL(const cl_mem& image, bool retainObject = false) : + Image3D(image, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3DGL(const Image3DGL& img) : Image3D(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Image3DGL& operator = (const Image3DGL &img) + { + Image3D::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT_ : Image3D(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Image3DGL& operator = (Image3DGL &&img) + { + Image3D::operator=(std::move(img)); + return *this; + } +}; +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +/*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ +class ImageGL : public Image +{ +public: + ImageGL( + const Context& context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != NULL) { + *err = error; + } + } + + ImageGL() : Image() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit ImageGL(const cl_mem& image, bool retainObject = false) : + Image(image, retainObject) { } + + ImageGL& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + ImageGL(const ImageGL& img) : Image(img) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + ImageGL& operator = (const ImageGL &img) + { + Image::operator=(img); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + ImageGL& operator = (ImageGL &&img) + { + Image::operator=(std::move(img)); + return *this; + } +}; +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/*! \brief Class interface for Pipe Memory Objects. +* +* See Memory for details about copy semantics, etc. +* +* \see Memory +*/ +class Pipe : public Memory +{ +public: + + /*! \brief Constructs a Pipe in a specified context. + * + * Wraps clCreatePipe(). + * @param context Context in which to create the pipe. + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe( + const Context& context, + cl_uint packet_size, + cl_uint max_packets, + cl_int* err = NULL) + { + cl_int error; + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Pipe in a the default context. + * + * Wraps clCreatePipe(). + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe( + cl_uint packet_size, + cl_uint max_packets, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Pipe() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. + * + * See Memory for further details. + */ + explicit Pipe(const cl_mem& pipe, bool retainObject = false) : + Memory(pipe, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Pipe& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Pipe(const Pipe& pipe) : Memory(pipe) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Pipe& operator = (const Pipe &pipe) + { + Memory::operator=(pipe); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Pipe(Pipe&& pipe) CL_HPP_NOEXCEPT_ : Memory(std::move(pipe)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Pipe& operator = (Pipe &&pipe) + { + Memory::operator=(std::move(pipe)); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_pipe_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPipeInfo, object_, name, param), + __GET_PIPE_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_pipe_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; // class Pipe +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 + + +/*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler + */ +class Sampler : public detail::Wrapper +{ +public: + //! \brief Default constructor - initializes to NULL. + Sampler() { } + + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_sampler_properties sampler_properties[] = { + CL_SAMPLER_NORMALIZED_COORDS, normalized_coords, + CL_SAMPLER_ADDRESSING_MODE, addressing_mode, + CL_SAMPLER_FILTER_MODE, filter_mode, + 0 }; + object_ = ::clCreateSamplerWithProperties( + context(), + sampler_properties, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } +#endif + } + + /*! \brief Constructor from cl_sampler - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + explicit Sampler(const cl_sampler& sampler, bool retainObject = false) : + detail::Wrapper(sampler, retainObject) { } + + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Sampler(const Sampler& sam) : detail::Wrapper(sam) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Sampler& operator = (const Sampler &sam) + { + detail::Wrapper::operator=(sam); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Sampler(Sampler&& sam) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(sam)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Sampler& operator = (Sampler &&sam) + { + detail::Wrapper::operator=(std::move(sam)); + return *this; + } + + //! \brief Wrapper for clGetSamplerInfo(). + template + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + //! \brief Wrapper for clGetSamplerInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +class Program; +class CommandQueue; +class DeviceCommandQueue; +class Kernel; + +//! \brief Class interface for specifying NDRange values. +class NDRange +{ +private: + size_type sizes_[3]; + cl_uint dimensions_; + +public: + //! \brief Default constructor - resulting range has zero dimensions. + NDRange() + : dimensions_(0) + { + sizes_[0] = 0; + sizes_[1] = 0; + sizes_[2] = 0; + } + + //! \brief Constructs one-dimensional range. + NDRange(size_type size0) + : dimensions_(1) + { + sizes_[0] = size0; + sizes_[1] = 1; + sizes_[2] = 1; + } + + //! \brief Constructs two-dimensional range. + NDRange(size_type size0, size_type size1) + : dimensions_(2) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = 1; + } + + //! \brief Constructs three-dimensional range. + NDRange(size_type size0, size_type size1, size_type size2) + : dimensions_(3) + { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; + } + + /*! \brief Conversion operator to const size_type *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const size_type*() const { + return sizes_; + } + + //! \brief Queries the number of dimensions in the range. + size_type dimensions() const + { + return dimensions_; + } + + //! \brief Returns the size of the object in bytes based on the + // runtime number of dimensions + size_type size() const + { + return dimensions_*sizeof(size_type); + } + + size_type* get() + { + return sizes_; + } + + const size_type* get() const + { + return sizes_; + } +}; + +//! \brief A zero-dimensional range. +static const NDRange NullRange; + +//! \brief Local address wrapper for use with Kernel::setArg +struct LocalSpaceArg +{ + size_type size_; +}; + +namespace detail { + +template +struct KernelArgumentHandler; + +// Enable for objects that are not subclasses of memory +// Pointers, constants etc +template +struct KernelArgumentHandler::value>::type> +{ + static size_type size(const T&) { return sizeof(T); } + static const T* ptr(const T& value) { return &value; } +}; + +// Enable for subclasses of memory where we want to get a reference to the cl_mem out +// and pass that in for safety +template +struct KernelArgumentHandler::value>::type> +{ + static size_type size(const T&) { return sizeof(cl_mem); } + static const cl_mem* ptr(const T& value) { return &(value()); } +}; + +// Specialization for DeviceCommandQueue defined later + +template <> +struct KernelArgumentHandler +{ + static size_type size(const LocalSpaceArg& value) { return value.size_; } + static const void* ptr(const LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +/*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ +inline LocalSpaceArg +Local(size_type size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +/*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel + */ +class Kernel : public detail::Wrapper +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. + Kernel() { } + + /*! \brief Constructor from cl_kernel - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + explicit Kernel(const cl_kernel& kernel, bool retainObject = false) : + detail::Wrapper(kernel, retainObject) { } + + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Kernel(const Kernel& kernel) : detail::Wrapper(kernel) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Kernel& operator = (const Kernel &kernel) + { + detail::Wrapper::operator=(kernel); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(kernel)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Kernel& operator = (Kernel &&kernel) + { + detail::Wrapper::operator=(std::move(kernel)); + return *this; + } + + template + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_arg_info, name>::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + template + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if (CL_HPP_TARGET_OPENCL_VERSION >= 200 && defined(CL_HPP_USE_CL_SUB_GROUPS_KHR)) || CL_HPP_TARGET_OPENCL_VERSION >= 210 + cl_int getSubGroupInfo(const cl::Device &dev, cl_kernel_sub_group_info name, const cl::NDRange &range, size_type* param) const + { +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + return detail::errHandler( + clGetKernelSubGroupInfo(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), + __GET_KERNEL_SUB_GROUP_INFO_ERR); + +#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clGetKernelSubGroupInfoKHR_fn PFN_clGetKernelSubGroupInfoKHR; + static PFN_clGetKernelSubGroupInfoKHR pfn_clGetKernelSubGroupInfoKHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetKernelSubGroupInfoKHR); + + return detail::errHandler( + pfn_clGetKernelSubGroupInfoKHR(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), + __GET_KERNEL_SUB_GROUP_INFO_ERR); + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + } + + template + size_type getSubGroupInfo(const cl::Device &dev, const cl::NDRange &range, cl_int* err = NULL) const + { + size_type param; + cl_int result = getSubGroupInfo(dev, name, range, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief setArg overload taking a shared_ptr type + */ + template + cl_int setArg(cl_uint index, const cl::pointer &argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr.get()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a vector type. + */ + template + cl_int setArg(cl_uint index, const cl::vector &argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr.data()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a pointer type + */ + template + typename std::enable_if::value, cl_int>::type + setArg(cl_uint index, const T argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr), + __SET_KERNEL_ARGS_ERR); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + /*! \brief setArg overload taking a POD type + */ + template + typename std::enable_if::value, cl_int>::type + setArg(cl_uint index, const T &value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, size_type size, const void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! + * Specify a vector of SVM pointers that the kernel may access in + * addition to its arguments. + */ + cl_int setSVMPointers(const vector &pointerList) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*pointerList.size(), + pointerList.data())); + } + + /*! + * Specify a std::array of SVM pointers that the kernel may access in + * addition to its arguments. + */ + template + cl_int setSVMPointers(const std::array &pointerList) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*pointerList.size(), + pointerList.data())); + } + + /*! \brief Enable fine-grained system SVM. + * + * \note It is only possible to enable fine-grained system SVM if all devices + * in the context associated with kernel support it. + * + * \param svmEnabled True if fine-grained system SVM is requested. False otherwise. + * \return CL_SUCCESS if the function was executed succesfully. CL_INVALID_OPERATION + * if no devices in the context support fine-grained system SVM. + * + * \see clSetKernelExecInfo + */ + cl_int enableFineGrainedSystemSVM(bool svmEnabled) + { + cl_bool svmEnabled_ = svmEnabled ? CL_TRUE : CL_FALSE; + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, + sizeof(cl_bool), + &svmEnabled_ + ) + ); + } + + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0, const pointer &t1, Ts & ... ts) + { + pointerList[index] = static_cast(t0.get()); + setSVMPointersHelper(pointerList, t1, ts...); + } + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0, T1 t1, Ts... ts) + { + pointerList[index] = static_cast(t0); + setSVMPointersHelper(pointerList, t1, ts...); + } + + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0) + { + pointerList[index] = static_cast(t0.get()); + } + + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0) + { + pointerList[index] = static_cast(t0); + } + + template + cl_int setSVMPointers(const T0 &t0, Ts & ... ts) + { + std::array pointerList; + + setSVMPointersHelper<0, 1 + sizeof...(Ts)>(pointerList, t0, ts...); + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*(1 + sizeof...(Ts)), + pointerList.data())); + } + + template + cl_int setExecInfo(cl_kernel_exec_info param_name, const T& val) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + param_name, + sizeof(T), + &val)); + } + + template + cl_int setExecInfo(typename detail::param_traits::param_type& val) + { + return setExecInfo(name, val); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Make a deep copy of the kernel object including its arguments. + * @return A new kernel object with internal state entirely separate from that + * of the original but with any arguments set on the original intact. + */ + Kernel clone() + { + cl_int error; + Kernel retValue(clCloneKernel(this->get(), &error)); + + detail::errHandler(error, __CLONE_KERNEL_ERR); + return retValue; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 +}; + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper +{ +public: +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + typedef vector> Binaries; + typedef vector Sources; +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + typedef vector > Binaries; + typedef vector > Sources; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + + Program( + const string& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const size_type length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const string& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const size_type length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } + + /** + * Create a program from a vector of source strings and the default context. + * Does not compile or link the program. + */ + Program( + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + Context context = Context::getDefault(err); + + const size_type n = (size_type)sources.size(); + + vector lengths(n); + vector strings(n); + + for (size_type i = 0; i < n; ++i) { +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings.data(), lengths.data(), &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + /** + * Create a program from a vector of source strings and a provided context. + * Does not compile or link the program. + */ + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const size_type n = (size_type)sources.size(); + + vector lengths(n); + vector strings(n); + + for (size_type i = 0; i < n; ++i) { +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings.data(), lengths.data(), &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 || (CL_HPP_TARGET_OPENCL_VERSION==200 && defined(CL_HPP_USE_IL_KHR)) + /** + * Program constructor to allow construction of program from SPIR-V or another IL. + * Valid for either OpenCL >= 2.1 or when CL_HPP_USE_IL_KHR is defined. + */ + Program( + const vector& IL, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + object_ = ::clCreateProgramWithIL( + context(), static_cast(IL.data()), IL.size(), &error); + +#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; + static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + + return detail::errHandler( + pfn_clCreateProgramWithILKHR( + context(), static_cast(IL.data()), IL.size(), &error); + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } + + /** + * Program constructor to allow construction of program from SPIR-V or another IL + * for a specific context. + * Valid for either OpenCL >= 2.1 or when CL_HPP_USE_IL_KHR is defined. + */ + Program( + const Context& context, + const vector& IL, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + object_ = ::clCreateProgramWithIL( + context(), static_cast(IL.data()), IL.size(), &error); + +#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; + static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + + return detail::errHandler( + pfn_clCreateProgramWithILKHR( + context(), static_cast(IL.data()), IL.size(), &error); + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram( + object_, + 0, + NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else + "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + NULL, + NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: + * CL_INVALID_CONTEXT if context is not a valid context. + * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; + * or if any entry in binaries is NULL or has length 0. + * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + */ + Program( + const Context& context, + const vector& devices, + const Binaries& binaries, + vector* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + + const size_type numDevices = devices.size(); + + // Catch size mismatch early and return + if(binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + + vector lengths(numDevices); + vector images(numDevices); +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { + images[i] = binaries[i].data(); + lengths[i] = binaries[(int)i].size(); + } +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char*)binaries[i].first; + lengths[i] = binaries[(int)i].second; + } +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + + vector deviceIDs(numDevices); + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + if(binaryStatus) { + binaryStatus->resize(numDevices); + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + deviceIDs.data(), + lengths.data(), images.data(), (binaryStatus != NULL && numDevices > 0) + ? &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program( + const Context& context, + const vector& devices, + const string& kernelNames, + cl_int* err = NULL) + { + cl_int error; + + + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), + (cl_uint) devices.size(), + deviceIDs.data(), + kernelNames.c_str(), + &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + Program() { } + + + /*! \brief Constructor from cl_program - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit Program(const cl_program& program, bool retainObject = false) : + detail::Wrapper(program, retainObject) { } + + Program& operator = (const cl_program& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + Program(const Program& program) : detail::Wrapper(program) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + Program& operator = (const Program &program) + { + detail::Wrapper::operator=(program); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + Program(Program&& program) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(program)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + Program& operator = (Program &&program) + { + detail::Wrapper::operator=(std::move(program)); + return *this; + } + + cl_int build( + const vector& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + cl_int buildError = ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs.data(), + options, + notifyFptr, + data); + + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + cl_int build( + const Device& device, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + cl_device_id deviceID = device(); + + cl_int buildError = ::clBuildProgram( + object_, + 1, + &deviceID, + options, + notifyFptr, + data); + + BuildLogType buildLog(1); + buildLog.push_back(std::make_pair(device, getBuildInfo(device))); + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, buildLog); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + cl_int buildError = ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data); + + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_int compile( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + cl_int error = ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data); + return detail::buildErrHandler(error, __COMPILE_PROGRAM_ERR, getBuildInfo()); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + template + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /** + * Build info function that returns a vector of device/info pairs for the specified + * info type and for all devices in the program. + * On an error reading the info for any device, an empty vector of info will be returned. + */ + template + vector::param_type>> + getBuildInfo(cl_int *err = NULL) const + { + cl_int result = CL_SUCCESS; + + auto devs = getInfo(&result); + vector::param_type>> + devInfo; + + // If there was an initial error from getInfo return the error + if (result != CL_SUCCESS) { + if (err != NULL) { + *err = result; + } + return devInfo; + } + + for (const cl::Device &d : devs) { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + result = getBuildInfo(d, name, ¶m); + devInfo.push_back( + std::pair::param_type> + (d, param)); + if (result != CL_SUCCESS) { + // On error, leave the loop and return the error code + break; + } + } + if (err != NULL) { + *err = result; + } + if (result != CL_SUCCESS) { + devInfo.clear(); + } + return devInfo; + } + + cl_int createKernels(vector* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + vector value(numKernels); + + err = ::clCreateKernelsInProgram( + object_, numKernels, value.data(), NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + if (kernels) { + kernels->resize(value.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < value.size(); i++) { + // We do not need to retain because this kernel is being created + // by the runtime + (*kernels)[i] = Kernel(value[i], false); + } + } + return CL_SUCCESS; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +#if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) + /*! \brief Registers a callback function to be called when destructors for + * program scope global variables are complete and before the + * program is released. + * + * Wraps clSetProgramReleaseCallback(). + * + * Each call to this function registers the specified user callback function + * on a callback stack associated with program. The registered user callback + * functions are called in the reverse order in which they were registered. + */ + CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int setReleaseCallback( + void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), + void * user_data = NULL) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED + { + return detail::errHandler( + ::clSetProgramReleaseCallback( + object_, + pfn_notify, + user_data), + __SET_PROGRAM_RELEASE_CALLBACK_ERR); + } +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) + + /*! \brief Sets a SPIR-V specialization constant. + * + * Wraps clSetProgramSpecializationConstant(). + */ + template + typename std::enable_if::value, cl_int>::type + setSpecializationConstant(cl_uint index, const T &value) + { + return detail::errHandler( + ::clSetProgramSpecializationConstant( + object_, + index, + sizeof(value), + &value), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); + } + + /*! \brief Sets a SPIR-V specialization constant. + * + * Wraps clSetProgramSpecializationConstant(). + */ + cl_int setSpecializationConstant(cl_uint index, size_type size, const void* value) + { + return detail::errHandler( + ::clSetProgramSpecializationConstant( + object_, + index, + size, + value), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 +}; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +inline Program linkProgram( + Program input1, + Program input2, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int error_local = CL_SUCCESS; + + cl_program programs[2] = { input1(), input2() }; + + Context ctx = input1.getInfo(&error_local); + if(error_local!=CL_SUCCESS) { + detail::errHandler(error_local, __LINK_PROGRAM_ERR); + } + + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + 2, + programs, + notifyFptr, + data, + &error_local); + + detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = error_local; + } + + return Program(prog); +} + +inline Program linkProgram( + vector inputPrograms, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int error_local = CL_SUCCESS; + + vector programs(inputPrograms.size()); + + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + + Context ctx; + if(inputPrograms.size() > 0) { + ctx = inputPrograms[0].getInfo(&error_local); + if(error_local!=CL_SUCCESS) { + detail::errHandler(error_local, __LINK_PROGRAM_ERR); + } + } + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + (cl_uint)inputPrograms.size(), + programs.data(), + notifyFptr, + data, + &error_local); + + detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = error_local; + } + + return Program(prog, false); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +// Template specialization for CL_PROGRAM_BINARIES +template <> +inline cl_int cl::Program::getInfo(cl_program_info name, vector>* param) const +{ + if (name != CL_PROGRAM_BINARIES) { + return CL_INVALID_VALUE; + } + if (param) { + // Resize the parameter array appropriately for each allocation + // and pass down to the helper + + vector sizes = getInfo(); + size_type numBinaries = sizes.size(); + + // Resize the parameter array and constituent arrays + param->resize(numBinaries); + for (size_type i = 0; i < numBinaries; ++i) { + (*param)[i].resize(sizes[i]); + } + + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + return CL_SUCCESS; +} + +template<> +inline vector> cl::Program::getInfo(cl_int* err) const +{ + vector> binariesVectors; + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binariesVectors); + if (err != NULL) { + *err = result; + } + return binariesVectors; +} + +#if CL_HPP_TARGET_OPENCL_VERSION >= 220 +// Template specialization for clSetProgramSpecializationConstant +template <> +inline cl_int cl::Program::setSpecializationConstant(cl_uint index, const bool &value) +{ + cl_uchar ucValue = value ? CL_UCHAR_MAX : 0; + return detail::errHandler( + ::clSetProgramSpecializationConstant( + object_, + index, + sizeof(ucValue), + &ucValue), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +enum class QueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, + OutOfOrder = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, +}; + +inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper +{ +private: + static std::once_flag default_initialized_; + static CommandQueue default_; + static cl_int default_error_; + + /*! \brief Create the default command queue returned by @ref getDefault. + * + * It sets default_error_ to indicate success or failure. It does not throw + * @c cl::Error. + */ + static void makeDefault() + { + /* We don't want to throw an error from this function, so we have to + * catch and set the error flag. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + int error; + Context context = Context::getDefault(&error); + + if (error != CL_SUCCESS) { + default_error_ = error; + } + else { + Device device = Device::getDefault(); + default_ = CommandQueue(context, device, 0, &default_error_); + } + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + /*! \brief Create the default command queue. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const CommandQueue &c) { + default_ = c; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = CommandQueue(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + } + else { + error = CL_INVALID_QUEUE_PROPERTIES; + } + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + } + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + + } + } + + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + explicit CommandQueue( + const Context& context, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + } + else { + error = CL_INVALID_QUEUE_PROPERTIES; + } + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + explicit CommandQueue( + const Context& context, + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), devices[0](), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + const Context& context, + const Device& device, + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + bool useWithProperties; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } +#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 + useWithProperties = true; +#else + useWithProperties = false; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 200 + if (!useWithProperties) { + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + } + + static CommandQueue getDefault(cl_int * err = NULL) + { + std::call_once(default_initialized_, makeDefault); +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); +#else // CL_HPP_TARGET_OPENCL_VERSION >= 200 + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_ERR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + /** + * Modify the default command queue to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default command queue. + * Should be compared to the passed value to ensure that it was updated. + */ + static CommandQueue setDefault(const CommandQueue &default_queue) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_queue)); + detail::errHandler(default_error_); + return default_; + } + + CommandQueue() { } + + + /*! \brief Constructor from cl_command_queue - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit CommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) { } + + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (const CommandQueue &queue) + { + detail::Wrapper::operator=(queue); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (CommandQueue &&queue) + { + detail::Wrapper::operator=(std::move(queue)); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + size_type src_offset, + size_type dst_offset, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + void *ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + const void *ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + size_type src_row_pitch, + size_type src_slice_pitch, + size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified as a vector type. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + * \tparam offset Is the offset in bytes into the buffer at + * which to start filling. This must be a multiple of + * the pattern size. + * \tparam size Is the size in bytes of the region to fill. + * This must be a multiple of the pattern size. + */ + template + cl_int enqueueFillBuffer( + const Buffer& buffer, + PatternType pattern, + size_type offset, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point color value if + * the image channel data type is not an unnormalized signed or + * unsigned data type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_float4 fillColor, + const array& origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA signed integer color value if + * the image channel data type is an unnormalized signed integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_int4 fillColor, + const array& origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA unsigned integer color value if + * the image channel data type is an unnormalized unsigned integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_uint4 fillColor, + const array& origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const array& src_origin, + const array& region, + size_type dst_offset, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, + src(), + dst(), + src_origin.data(), + region.data(), + dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + size_type src_offset, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, + src(), + dst(), + src_offset, + dst_origin.data(), + region.data(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + size_type offset, + size_type size, + const vector* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_event tmp; + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + if (event != NULL && error == CL_SUCCESS) + *event = tmp; + + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const array& origin, + const array& region, + size_type * row_pitch, + size_type * slice_pitch, + const vector* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_event tmp; + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + origin.data(), + region.data(), + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + if (event != NULL && error == CL_SUCCESS) + *event = tmp; + return result; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ + template + cl_int enqueueMapSVM( + cl::pointer &ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr.get()), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueMapSVM( + cl::vector &container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(container.data()), container.size()*sizeof(T), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ + template + cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr.get()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(container.data()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const vector *events = 0, + Event *event = 0) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList( + const vector *events = 0, + Event *event = 0) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects( + const vector &memObjects, + cl_mem_migration_flags flags, + const vector* events = NULL, + Event* event = NULL + ) const + { + cl_event tmp; + + vector localMemObjects(memObjects.size()); + + for( int i = 0; i < (int)memObjects.size(); ++i ) { + localMemObjects[i] = memObjects[i](); + } + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + localMemObjects.data(), + flags, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from each pointer to migrate. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector &svmRawPointers, + const cl::vector &sizes, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMigrateMem( + object_, + svmRawPointers.size(), static_cast(svmRawPointers.data()), + sizes.data(), // array of sizes not passed + flags, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MIGRATE_SVM_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector &svmRawPointers, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + return enqueueMigrateSVM(svmRawPointers, cl::vector(svmRawPointers.size()), flags, events, event); + } + + + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from each pointer to migrate. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmPointers, + const cl::vector &sizes, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + cl::vector svmRawPointers; + svmRawPointers.reserve(svmPointers.size()); + for (auto p : svmPointers) { + svmRawPointers.push_back(static_cast(p.get())); + } + + return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); + } + + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmPointers, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + return enqueueMigrateSVM(svmPointers, cl::vector(svmPointers.size()), flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from the beginning of each container to migrate. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmContainers, + const cl::vector &sizes, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + cl::vector svmRawPointers; + svmRawPointers.reserve(svmContainers.size()); + for (auto p : svmContainers) { + svmRawPointers.push_back(static_cast(p.data())); + } + + return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM( + const cl::vector> &svmContainers, + cl_mem_migration_flags flags = 0, + const vector* events = NULL, + Event* event = NULL) const + { + return enqueueMigrateSVM(svmContainers, cl::vector(svmContainers.size()), flags, events, event); + } + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local = NullRange, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const size_type*) offset : NULL, + (const size_type*) global, + local.dimensions() != 0 ? (const size_type*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) + CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( + const Kernel& kernel, + const vector* events = NULL, + Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) + + cl_int enqueueNativeKernel( + void (CL_CALLBACK *userFptr)(void *), + std::pair args, + const vector* mem_objects = NULL, + const vector* mem_locs = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + size_type elements = 0; + if (mem_objects != NULL) { + elements = mem_objects->size(); + } + vector mems(elements); + for (unsigned int i = 0; i < elements; i++) { + mems[i] = ((*mem_objects)[i])(); + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems.data(), + (mem_locs != NULL && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarker( + object_, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const vector& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + events.size() > 0 ? (const cl_event*) &events.front() : NULL), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } +#endif // defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + + cl_int enqueueAcquireGLObjects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined (CL_HPP_USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireD3D10ObjectsKHR); +#endif +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueAcquireD3D10ObjectsKHR); +#endif + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects( + const vector* mem_objects = NULL, + const vector* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueReleaseD3D10ObjectsKHR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; // CommandQueue + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ CommandQueue CommandQueue::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int CommandQueue::default_error_ = CL_SUCCESS; + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +enum class DeviceQueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, +}; + +inline DeviceQueueProperties operator|(DeviceQueueProperties lhs, DeviceQueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/*! \class DeviceCommandQueue + * \brief DeviceCommandQueue interface for device cl_command_queues. + */ +class DeviceCommandQueue : public detail::Wrapper +{ +public: + + /*! + * Trivial empty constructor to create a null queue. + */ + DeviceCommandQueue() { } + + /*! + * Default construct device command queue on default context and device + */ + DeviceCommandQueue(DeviceQueueProperties properties, cl_int* err = NULL) + { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue( + const Context& context, + const Device& device, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int* err = NULL) + { + cl_int error; + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue( + const Context& context, + const Device& device, + cl_uint queueSize, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int* err = NULL) + { + cl_int error; + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, + CL_QUEUE_SIZE, queueSize, + 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructor from cl_command_queue - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit DeviceCommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) { } + + DeviceCommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue(const DeviceCommandQueue& queue) : detail::Wrapper(queue) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue& operator = (const DeviceCommandQueue &queue) + { + detail::Wrapper::operator=(queue); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue(DeviceCommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue& operator = (DeviceCommandQueue &&queue) + { + detail::Wrapper::operator=(std::move(queue)); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + /*! + * Create a new default device command queue for the default device, + * in the default context and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + cl_int *err = nullptr) + { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); + + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + + return deviceQueue; + } + + /*! + * Create a new default device command queue for the specified device + * and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + const Context &context, const Device &device, cl_int *err = nullptr) + { + cl_int error; + + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + + return deviceQueue; + } + + /*! + * Create a new default device command queue for the specified device + * and of the requested size in bytes. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + const Context &context, const Device &device, cl_uint queueSize, cl_int *err = nullptr) + { + cl_int error; + + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + CL_QUEUE_SIZE, queueSize, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + + return deviceQueue; + } + + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /*! + * Modify the default device command queue to be used for subsequent kernels. + * This can update the default command queue for a device repeatedly to account + * for kernels that rely on the default. + * @return updated default device command queue. + */ + static DeviceCommandQueue updateDefault(const Context &context, const Device &device, const DeviceCommandQueue &default_queue, cl_int *err = nullptr) + { + cl_int error; + error = clSetDefaultDeviceCommandQueue(context.get(), device.get(), default_queue.get()); + + detail::errHandler(error, __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + return default_queue; + } + + /*! + * Return the current default command queue for the specified command queue + */ + static DeviceCommandQueue getDefault(const CommandQueue &queue, cl_int * err = NULL) + { + return queue.getInfo(err); + } + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 +}; // DeviceCommandQueue + +namespace detail +{ + // Specialization for device command queue + template <> + struct KernelArgumentHandler + { + static size_type size(const cl::DeviceCommandQueue&) { return sizeof(cl_command_queue); } + static const cl_command_queue* ptr(const cl::DeviceCommandQueue& value) { return &(value()); } + }; +} // namespace detail + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + size_type size = sizeof(DataType)*(endIterator - startIterator); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +template< typename IteratorType > +Buffer::Buffer( + const CommandQueue &queue, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if (readOnly) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if (useHostPtr) { + flags |= CL_MEM_USE_HOST_PTR; + } + + size_type size = sizeof(DataType)*(endIterator - startIterator); + + Context context = queue.getInfo(); + + if (useHostPtr) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } + else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if (!useHostPtr) { + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + size_type offset, + size_type size, + const vector* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; +} + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events, + Event* event) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} + +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueMapSVM( + cl::pointer ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} + +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueMapSVM( + cl::vector container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + container, blocking, flags, events, event); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; +} + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + +} + +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} + +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(container, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + size_type src_offset, + size_type dst_offset, + size_type size, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, startIterator, endIterator, buffer); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + size_type length = endIterator-startIterator; + size_type byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + size_type length = endIterator-startIterator; + size_type byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Blocking SVM map operation - performs a blocking map underneath. + */ +template +inline cl_int mapSVM(cl::vector &container) +{ + return enqueueMapSVM(container, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE); +} + +/** +* Blocking SVM map operation - performs a blocking map underneath. +*/ +template +inline cl_int unmapSVM(cl::vector &container) +{ + return enqueueUnmapSVM(container); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + void *ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + const void *ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + size_type src_row_pitch, + size_type src_slice_pitch, + size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const array& src_origin, + const array& region, + size_type dst_offset, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + size_type src_offset, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + +class EnqueueArgs +{ +private: + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + vector events_; + + template + friend class KernelFunctor; + +public: + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(const vector &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(const vector &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(const vector &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } +}; + + +//---------------------------------------------------------------------------------------------- + + +/** + * Type safe kernel functor. + * + */ +template +class KernelFunctor +{ +private: + Kernel kernel_; + + template + void setArgs(T0&& t0, T1s&&... t1s) + { + kernel_.setArg(index, t0); + setArgs(std::forward(t1s)...); + } + + template + void setArgs(T0&& t0) + { + kernel_.setArg(index, t0); + } + + template + void setArgs() + { + } + + +public: + KernelFunctor(Kernel kernel) : kernel_(kernel) + {} + + KernelFunctor( + const Program& program, + const string name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} + + //! \brief Return type of the functor + typedef Event result_type; + + /** + * Enqueue kernel. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + */ + Event operator() ( + const EnqueueArgs& args, + Ts... ts) + { + Event event; + setArgs<0>(std::forward(ts)...); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + + /** + * Enqueue kernel with support for error code. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + * @param error Out parameter returning the error code from the execution. + */ + Event operator() ( + const EnqueueArgs& args, + Ts... ts, + cl_int &error) + { + Event event; + setArgs<0>(std::forward(ts)...); + + error = args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_int setSVMPointers(const vector &pointerList) + { + return kernel_.setSVMPointers(pointerList); + } + + template + cl_int setSVMPointers(const T0 &t0, T1s &... ts) + { + return kernel_.setSVMPointers(t0, ts...); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + Kernel getKernel() + { + return kernel_; + } +}; + +namespace compatibility { + /** + * Backward compatibility class to ensure that cl.hpp code works with opencl.hpp. + * Please use KernelFunctor directly. + */ + template + struct make_kernel + { + typedef KernelFunctor FunctorType; + + FunctorType functor_; + + make_kernel( + const Program& program, + const string name, + cl_int * err = NULL) : + functor_(FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel) : + functor_(FunctorType(kernel)) + {} + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + Ts...); + + Event operator()( + const EnqueueArgs& enqueueArgs, + Ts... args) + { + return functor_( + enqueueArgs, args...); + } + }; +} // namespace compatibility + + +//---------------------------------------------------------------------------------------------------------------------- + +#undef CL_HPP_ERR_STR_ +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_PLATFORM_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_ARG_INFO_ERR +#undef __GET_KERNEL_SUB_GROUP_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR +#undef __CREATE_CONTEXT_ERR +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR +#undef __CREATE_BUFFER_ERR +#undef __COPY_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_GL_BUFFER_ERR +#undef __CREATE_GL_RENDER_BUFFER_ERR +#undef __GET_GL_OBJECT_INFO_ERR +#undef __CREATE_IMAGE_ERR +#undef __CREATE_GL_TEXTURE_ERR +#undef __IMAGE_DIMENSION_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR +#undef __WAIT_FOR_EVENTS_ERR +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_IL_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __CREATE_PROGRAM_WITH_IL_ERR +#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR +#undef __BUILD_PROGRAM_ERR +#undef __COMPILE_PROGRAM_ERR +#undef __LINK_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR +#undef __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR +#undef __CREATE_SAMPLER_WITH_PROPERTIES_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_FILL_BUFFER_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_FILL_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_NATIVE_KERNEL +#undef __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR +#undef __ENQUEUE_MIGRATE_SVM_ERR +#undef __ENQUEUE_ACQUIRE_GL_ERR +#undef __ENQUEUE_RELEASE_GL_ERR +#undef __CREATE_PIPE_ERR +#undef __GET_PIPE_INFO_ERR +#undef __RETAIN_ERR +#undef __RELEASE_ERR +#undef __FLUSH_ERR +#undef __FINISH_ERR +#undef __VECTOR_CAPACITY_ERR +#undef __CREATE_SUB_DEVICES_ERR +#undef __CREATE_SUB_DEVICES_ERR +#undef __ENQUEUE_MARKER_ERR +#undef __ENQUEUE_WAIT_FOR_EVENTS_ERR +#undef __ENQUEUE_BARRIER_ERR +#undef __UNLOAD_COMPILER_ERR +#undef __CREATE_GL_TEXTURE_2D_ERR +#undef __CREATE_GL_TEXTURE_3D_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __ENQUEUE_TASK_ERR +#undef __CREATE_SAMPLER_ERR +#undef __ENQUEUE_MARKER_WAIT_LIST_ERR +#undef __ENQUEUE_BARRIER_WAIT_LIST_ERR +#undef __CLONE_KERNEL_ERR +#undef __GET_HOST_TIMER_ERR +#undef __GET_DEVICE_AND_HOST_TIMER_ERR + +#endif //CL_HPP_USER_OVERRIDE_ERROR_STRINGS + +// Extensions +#undef CL_HPP_INIT_CL_EXT_FCN_PTR_ +#undef CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_ + +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) +#undef CL_HPP_PARAM_NAME_DEVICE_FISSION_ +#endif // CL_HPP_USE_CL_DEVICE_FISSION + +#undef CL_HPP_NOEXCEPT_ +#undef CL_HPP_DEFINE_STATIC_MEMBER_ + +} // namespace cl + +#endif // CL_HPP_ From 36aaa593fc667aac1d7595eb456a91935cdf2171 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 13:20:41 +0100 Subject: [PATCH 10/77] tests/sixtracklib/opencl: updates wrapper include header in OpenCL test --- tests/sixtracklib/opencl/test_buffer_opencl_c99.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sixtracklib/opencl/test_buffer_opencl_c99.cpp b/tests/sixtracklib/opencl/test_buffer_opencl_c99.cpp index 5d95cf4f8..7bb65bed8 100644 --- a/tests/sixtracklib/opencl/test_buffer_opencl_c99.cpp +++ b/tests/sixtracklib/opencl/test_buffer_opencl_c99.cpp @@ -17,7 +17,7 @@ #include "sixtracklib/common/generated/path.h" #include "sixtracklib/common/buffer.h" #include "sixtracklib/opencl/context.h" -#include "sixtracklib/opencl/cl.h" +#include "sixtracklib/opencl/opencl.h" TEST( C99_OpenCL_Buffer, InitWithGenericObjDataCopyToDeviceCopyBackCmpSingleThread ) From e6fcc21ce5f7889e3e3ef23339dac4c375dfe246 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 21:44:44 +0100 Subject: [PATCH 11/77] cmake: debugs issues with OpenCL setup in cmake --- cmake/SetupOpenCL.cmake | 51 +++++++++++++++++-------------- sixtracklib/opencl/CMakeLists.txt | 3 ++ 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/cmake/SetupOpenCL.cmake b/cmake/SetupOpenCL.cmake index dddd068f8..17e6cfd5d 100644 --- a/cmake/SetupOpenCL.cmake +++ b/cmake/SetupOpenCL.cmake @@ -15,14 +15,19 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) set( SIXTRL_OPENCL_LIBRARIES ) if( NOT OpenCL_FOUND ) + message( STATUS "---- Checking for OpenCL installation ... " ) find_package( OpenCL QUIET ) endif() if( OpenCL_FOUND ) set( SIXTRL_TEMP_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS} ) set( SIXTRL_OPENCL_LIBRARIES ${OpenCL_LIBRARIES} ) + message( STATUS "---- OpenCL environment found (ver. ${OpenCL_VERSION})" ) + message( STATUS "---- OpenCL library ${OpenCL_LIBRARIES}" ) + message( STATUS "---- OpenCL include directories ${OpenCL_INCLUDE_DIRS}" ) elseif( SIXTRACKL_REQUIRE_OFFLINE_BUILD ) - set( SIXTRL_TEMP_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/external/CL" ) + message( STATUS "---- OpenCL not found, use fallback headers due to offline build" ) + set( SIXTRL_TEMP_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/external" ) else() message( FATAL_ERROR "---- Unable to find OpenCL setup, unable to download since offline build required" ) @@ -64,7 +69,7 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION OR NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) - set( SIXTRL_OPENCL_EXT_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/CL ) + set( SIXTRL_OPENCL_EXT_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include" ) include( FetchContent ) endif() @@ -76,8 +81,8 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE OR NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) ) - if( NOT EXISTS ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) - file( MAKE_DIRECTORY ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + if( NOT EXISTS "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) + file( MAKE_DIRECTORY "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) endif() FetchContent_Declare( opencl_c99_headers @@ -105,18 +110,17 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) foreach( in_file ${SIXTRL_OPENCL_C99_IN_FILES} ) get_filename_component( in_file_name ${in_file} NAME ) - get_filename_component( in_dir ${in_file} DIRECTORY ) file( COPY ${in_file} - DESTINATION ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + DESTINATION "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) if( NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE ) if( ${in_file_name} STREQUAL "opencl.h" ) - set( SIXTRL_OPENCL_C99_HEADER_FILE "opencl.h" ) - set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/opencl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 3 ) elseif( ${in_file_name} STREQUAL "cl.h" ) - set( SIXTRL_OPENCL_C99_HEADER_FILE "cl.h" ) - set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/cl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 1 ) endif() endif() @@ -156,22 +160,21 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) foreach( in_file ${SIXTRL_OPENCL_CXX_IN_FILES} ) get_filename_component( in_file_name ${in_file} NAME ) - get_filename_component( in_dir ${in_file} DIRECTORY ) file( COPY ${in_file} - DESTINATION ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + DESTINATION "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) if( NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE ) if( ${in_file_name} STREQUAL "opencl.hpp" ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE "opencl.hpp" ) - set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/opencl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 3 ) elseif( ${in_file_name} STREQUAL "cl2.hpp" ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE "cl2.hpp" ) - set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl2.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 2 ) elseif( ${in_file_name} STREQUAL "cl.hpp" ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE "cl.hpp" ) - set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${in_dir} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 1 ) endif() endif() @@ -192,6 +195,7 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( OpenCL_FOUND ) + message( STATUS "---- OpenCL environment found (ver. ${OpenCL_VERSION} )" ) set( SIXTRACKLIB_MODULE_VALUE_OPENCL 1 ) elseif( NOT OpenCL_FOUND AND NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD ) FetchContent_Declare( opencl_icd_loader @@ -205,10 +209,10 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) message( STATUS "------ Downloading external OpenCL ICD Loader [DONE]" ) endif() - get_filename_component( SIXTRL_OPENCL_C99_HEADER_DIR + get_filename_component( SIXTRL_TEMP_INCLUDE_DIRS ${SIXTRL_OPENCL_C99_HEADER_FILE} DIRECTORY ) - file( COPY ${SIXTRL_OPENCL_C99_HEADER_DIR} DESTINATION + file( COPY ${SIXTRL_TEMP_INCLUDE_DIRS} DESTINATION ${opencl_icd_loader_SOURCE_DIR}/inc PATTERN "*.h" ) FetchContent_MakeAvailable( opencl_icd_loader ) @@ -222,10 +226,11 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - set( SIXTRL_OPENCL_INCLUDE_DIRS ${SIXTRL_OPENCL_C99_INCLUDE_DIR} ) + set( SIXTRL_OPENCL_INCLUDE_DIRS ${SIXTRL_OPENCL_INCLUDE_DIRS} + ${SIXTRL_OPENCL_C99_INCLUDE_DIR} ) - if( NOT ( ${SIXTRL_OPENCL_CXX_INCLUDE_DIR} STREQUAL - ${SIXTRL_OPENCL_C99_INCLUDE_DIR} ) ) + if( NOT ( "${SIXTRL_OPENCL_CXX_INCLUDE_DIR}" STREQUAL + "${SIXTRL_OPENCL_C99_INCLUDE_DIR}" ) ) set( SIXTRL_OPENCL_INCLUDE_DIRS ${SIXTRL_OPENCL_INCLUDE_DIRS} ${SIXTRL_OPENCL_CXX_INCLUDE_DIR} ) endif() diff --git a/sixtracklib/opencl/CMakeLists.txt b/sixtracklib/opencl/CMakeLists.txt index a9d086f9e..14a9e5969 100644 --- a/sixtracklib/opencl/CMakeLists.txt +++ b/sixtracklib/opencl/CMakeLists.txt @@ -73,6 +73,9 @@ set( SIXTRACKLIB_OPENCL_KERNEL_SOURCES kernels/assign_address_item.cl ) +message( STATUS "----- SIXTRL_OPENCL_INCLUDE_DIRS: ${SIXTRL_OPENCL_INCLUDE_DIRS}" ) +message( STATUS "----- SIXTRL_OPENCL_LIBRARIES: ${SIXTRL_OPENCL_LIBRARIES}" ) + add_library( sixtrack_opencl OBJECT ${SIXTRACKLIB_OPENCL_HEADERS} ${SIXTRACKLIB_OPENCL_SOURCES} ) From 0e6992ad2206873580c9ec05bacbcd6cf1b343da Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:04:51 +0100 Subject: [PATCH 12/77] sixtracklib/opencl: fixes superflous type alias --- sixtracklib/opencl/internal/base_context.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/sixtracklib/opencl/internal/base_context.h b/sixtracklib/opencl/internal/base_context.h index baedac9b7..f667c1fd8 100644 --- a/sixtracklib/opencl/internal/base_context.h +++ b/sixtracklib/opencl/internal/base_context.h @@ -38,8 +38,6 @@ #include "sixtracklib/opencl/opencl.hpp" #endif /* !defined( SIXTRL_NO_INCLUDES ) */ -using NS(arch_size_t) = std::size_t; - namespace SIXTRL_CXX_NAMESPACE { class ClArgument; From 129b5ee94483f8ea14cc93c671f9d852661238aa Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:09:38 +0100 Subject: [PATCH 13/77] sixtracklib/opencl: fixes type alias issue --- sixtracklib/opencl/argument.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sixtracklib/opencl/argument.h b/sixtracklib/opencl/argument.h index 5dbbe0b48..70059a394 100644 --- a/sixtracklib/opencl/argument.h +++ b/sixtracklib/opencl/argument.h @@ -61,7 +61,7 @@ namespace SIXTRL_CXX_NAMESPACE public: using context_base_t = ClContextBase; - using size_type = std::size_t; + using size_type = ::NS(arch_size_t); using cobj_buffer_t = struct NS(Buffer); using cxx_cobj_buffer_t = SIXTRL_CXX_NAMESPACE::Buffer; using status_t = SIXTRL_CXX_NAMESPACE::arch_status_t; From f7c6ca43b4c3704b014e5e7a38c638d551fc0131 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:11:11 +0100 Subject: [PATCH 14/77] sixtracklib/opencl: fixes type alias issue --- sixtracklib/opencl/internal/base_context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sixtracklib/opencl/internal/base_context.cpp b/sixtracklib/opencl/internal/base_context.cpp index 478cf8c49..ba4cbe579 100644 --- a/sixtracklib/opencl/internal/base_context.cpp +++ b/sixtracklib/opencl/internal/base_context.cpp @@ -37,7 +37,7 @@ namespace SIXTRL_CXX_NAMESPACE { namespace st = SIXTRL_CXX_NAMESPACE; using ctx_t = st::ClContextBase; - using st_size_t = st_size_t; + using st_size_t = ctx_t::size_type; using st_kernel_id_t = ctx_t::kernel_id_t; using st_kernel_arg_type_t = ctx_t::kernel_arg_type_t; using st_status_t = ctx_t::status_t; From b16b0d5d019f9c85acb53fd13619138f70ed050b Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:26:53 +0100 Subject: [PATCH 15/77] tools: fixes inconsistent compiler flags for compiling the tools --- tools/CMakeLists.txt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 45b10ce75..62cb4caf7 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -39,16 +39,12 @@ list( APPEND TOOLS_C99_TARGETS dump_cbuffer ) # set properties for all tools: if( TOOLS_C99_TARGETS ) - set( TOOLS_COMPILE_OPTIONS ${SIXTRACKLIB_CPU_FLAGS} ) - list( APPEND TOOLS_COMPILE_OPTIONS -Wall ) - list( APPEND TOOLS_COMPILE_OPTIONS -Werror ) - list( APPEND TOOLS_COMPILE_OPTIONS -pedantic ) - set_property( TARGET ${TOOLS_C99_TARGETS} PROPERTY LINKER_LANGUAGE C ) set_property( TARGET ${TOOLS_C99_TARGETS} PROPERTY C_STANDARD 99 ) set_property( TARGET ${TOOLS_C99_TARGETS} PROPERTY C_STANDARD_REQUIRED ON ) set_property( TARGET ${TOOLS_C99_TARGETS} - APPEND PROPERTY COMPILE_OPTIONS ${TOOLS_COMPILE_OPTIONS} ) + APPEND PROPERTY COMPILE_OPTIONS ${SIXTRACKLIB_CPU_FLAGS} + ${SIXTRACKLIB_C99_FLAGS} ${SIXTRACKL_C99_AUTOVEC_FLAGS} ) set_property( TARGET ${TOOLS_C99_TARGETS} PROPERTY LINK_LIBRARIES ${TOOLS_LINK_LIBRARIES} ) From 10004430a936984fa65fac00aedd08711ad74739 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:28:00 +0100 Subject: [PATCH 16/77] sixtracklib/testlib: fixes type inconsistent print formats --- .../common/beam_elements/beam_elements.c | 3 +- .../common/beam_elements/beam_elements.h | 3 +- .../testlib/common/buffer/buffer.c | 20 +++--- .../testlib/common/particles/particles.c | 69 ++++++++++--------- .../testlib/common/particles/particles.h | 13 ++-- 5 files changed, 59 insertions(+), 49 deletions(-) diff --git a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c index ab56ef9f2..4ccbcb7ff 100644 --- a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c +++ b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c @@ -162,7 +162,8 @@ void NS(BeamElement_print)( printf( "|unknown | type_id = %3d\r\n" " | size = %8lu bytes\r\n" " | addr = %16p\r\n", - ( int )type_id, NS(Object_get_size)( be_info ), + ( int )type_id, + ( long unsigned )NS(Object_get_size)( be_info ), ( void* )( uintptr_t )addr ); } }; diff --git a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h index 70b4bc209..6fa0a8625 100644 --- a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h +++ b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h @@ -250,7 +250,8 @@ SIXTRL_INLINE void NS(BeamElement_print_out)( printf( "|unknown | type_id = %3d;\r\n" " | size = %8lu bytes;\r\n" " | addr = %16p;\r\n", - ( int )type_id, NS(Object_get_size)( be_info ), + ( int )type_id, + ( long unsigned )NS(Object_get_size)( be_info ), ( void const* )( uintptr_t )addr ); } }; diff --git a/tests/sixtracklib/testlib/common/buffer/buffer.c b/tests/sixtracklib/testlib/common/buffer/buffer.c index e369aebc0..684b88081 100644 --- a/tests/sixtracklib/testlib/common/buffer/buffer.c +++ b/tests/sixtracklib/testlib/common/buffer/buffer.c @@ -336,16 +336,16 @@ void NS(Buffer_object_print)( SIXTRL_ARGPTR_DEC FILE* fp, " | max_turn = %16ld ;\r\n" " | is_rolling = %16ld ;\r\n" " | out_store_addr = %16p ;\r\n", - ( int64_t )NS(ElemByElemConfig_get_order)( config ), - ( int64_t )NS(ElemByElemConfig_get_num_particles_to_store)( config ), - ( int64_t )NS(ElemByElemConfig_get_num_elements_to_store)( config ), - ( int64_t )NS(ElemByElemConfig_get_num_turns_to_store)( config ), - ( int64_t )NS(ElemByElemConfig_get_min_particle_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_max_particle_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_min_element_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_max_element_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_min_turn)( config ), - ( int64_t )NS(ElemByElemConfig_get_max_turn)( config ), + ( long int )NS(ElemByElemConfig_get_order)( config ), + ( long int )NS(ElemByElemConfig_get_num_particles_to_store)( config ), + ( long int )NS(ElemByElemConfig_get_num_elements_to_store)( config ), + ( long int )NS(ElemByElemConfig_get_num_turns_to_store)( config ), + ( long int )NS(ElemByElemConfig_get_min_particle_id)( config ), + ( long int )NS(ElemByElemConfig_get_max_particle_id)( config ), + ( long int )NS(ElemByElemConfig_get_min_element_id)( config ), + ( long int )NS(ElemByElemConfig_get_max_element_id)( config ), + ( long int )NS(ElemByElemConfig_get_min_turn)( config ), + ( long int )NS(ElemByElemConfig_get_max_turn)( config ), config->is_rolling, ( void* )( uintptr_t )config->out_store_addr ); } diff --git a/tests/sixtracklib/testlib/common/particles/particles.c b/tests/sixtracklib/testlib/common/particles/particles.c index 90723a042..66ee06925 100644 --- a/tests/sixtracklib/testlib/common/particles/particles.c +++ b/tests/sixtracklib/testlib/common/particles/particles.c @@ -620,16 +620,20 @@ void NS(Particles_print_single)( NS(Particles_get_charge_ratio_value)( particles, index ) ); fprintf( fp, "particle_id = %18ld\r\n", - NS(Particles_get_particle_id_value)( particles, index ) ); + ( long int )NS(Particles_get_particle_id_value)( + particles, index ) ); fprintf( fp, "at_elem_id = %18ld\r\n", - NS(Particles_get_at_element_id_value)( particles, index ) ); + ( long int )NS(Particles_get_at_element_id_value)( + particles, index ) ); fprintf( fp, "at_turn = %18ld\r\n", - NS(Particles_get_at_turn_value)( particles, index ) ); + ( long int )NS(Particles_get_at_turn_value)( + particles, index ) ); fprintf( fp, "state = %18ld\r\n\r\n", - NS(Particles_get_state_value)( particles, index ) ); + ( long int )NS(Particles_get_state_value)( + particles, index ) ); } return; @@ -651,7 +655,7 @@ void NS(Particles_print)( { if( num_particles > 1u ) { - fprintf( fp, "particle id = %8lu\r\n", ii ); + fprintf( fp, "particle id = %8lu\r\n", ( unsigned long )ii ); } NS(Particles_print_single)( fp, particles, ii ); @@ -686,107 +690,107 @@ void NS(Particles_print_max_diff)( fprintf( fp, "Delta |q0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_q0_value)( max_diff, 0 ), - max_diff_indices[ 0 ] ); + ( unsigned long )max_diff_indices[ 0 ] ); fprintf( fp, "Delta |mass0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_mass0_value)( max_diff, 0 ), - max_diff_indices[ 1 ] ); + ( unsigned long )max_diff_indices[ 1 ] ); fprintf( fp, "Delta |beta0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_beta0_value)( max_diff, 0 ), - max_diff_indices[ 2 ] ); + ( unsigned long )max_diff_indices[ 2 ] ); fprintf( fp, "Delta |gamma0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_gamma0_value)( max_diff, 0 ), - max_diff_indices[ 3 ] ); + ( unsigned long )max_diff_indices[ 3 ] ); fprintf( fp, "Delta |p0c| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_p0c_value)( max_diff, 0 ), - max_diff_indices[ 4 ] ); + ( unsigned long )max_diff_indices[ 4 ] ); fprintf( fp, "Delta |s| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_s_value)( max_diff, 0 ), - max_diff_indices[ 5 ] ); + ( unsigned long )max_diff_indices[ 5 ] ); fprintf( fp, "Delta |x| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_x_value)( max_diff, 0 ), - max_diff_indices[ 6 ] ); + ( unsigned long )max_diff_indices[ 6 ] ); fprintf( fp, "Delta |y| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_y_value)( max_diff, 0 ), - max_diff_indices[ 7 ] ); + ( unsigned long )max_diff_indices[ 7 ] ); fprintf( fp, "Delta |px| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_px_value)( max_diff, 0 ), - max_diff_indices[ 8 ] ); + ( unsigned long )max_diff_indices[ 8 ] ); fprintf( fp, "Delta |py| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_py_value)( max_diff, 0 ), - max_diff_indices[ 9 ] ); + ( unsigned long )max_diff_indices[ 9 ] ); fprintf( fp, "Delta |zeta| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_zeta_value)( max_diff, 0 ), - max_diff_indices[ 10 ] ); + ( unsigned long )max_diff_indices[ 10 ] ); fprintf( fp, "Delta |psigma| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_psigma_value)( max_diff, 0 ), - max_diff_indices[ 11 ] ); + ( unsigned long )max_diff_indices[ 11 ] ); fprintf( fp, "Delta |delta| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_delta_value)( max_diff, 0 ), - max_diff_indices[ 12 ] ); + ( unsigned long )max_diff_indices[ 12 ] ); fprintf( fp, "Delta |rpp| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_rpp_value)( max_diff, 0 ), - max_diff_indices[ 13 ] ); + ( unsigned long )max_diff_indices[ 13 ] ); fprintf( fp, "Delta |rvv| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_rvv_value)( max_diff, 0 ), - max_diff_indices[ 14 ] ); + ( unsigned long )max_diff_indices[ 14 ] ); fprintf( fp, "Delta |chi| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_chi_value)( max_diff, 0 ), - max_diff_indices[ 15 ] ); + ( unsigned long )max_diff_indices[ 15 ] ); fprintf( fp, "Delta |charge_ratio| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_charge_ratio_value)( max_diff, 0 ), - max_diff_indices[ 15 ] ); + ( unsigned long )max_diff_indices[ 15 ] ); fprintf( fp, "Delta |particle_id| = %22ld " "max diff at index = %8lu\r\n", - NS(Particles_get_particle_id_value)( max_diff, 0 ), - max_diff_indices[ 16 ] ); + ( long int )NS(Particles_get_particle_id_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 16 ] ); fprintf( fp, "Delta |at_elem_id| = %22ld " "max diff at index = %8lu\r\n", - NS(Particles_get_at_element_id_value)( max_diff, 0 ), - max_diff_indices[ 17 ] ); + ( long int )NS(Particles_get_at_element_id_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 17 ] ); fprintf( fp, "Delta |at_turn| = %22ld " "max diff at index = %8lu\r\n", - NS(Particles_get_at_turn_value)( max_diff, 0 ), - max_diff_indices[ 18 ] ); + ( long int )NS(Particles_get_at_turn_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 18 ] ); fprintf( fp, "Delta |state| = %22ld " "max diff at index = %8lu\r\n\r\n", - NS(Particles_get_state_value)( max_diff, 0 ), - max_diff_indices[ 19 ] ); + ( long int )NS(Particles_get_state_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 19 ] ); } return; @@ -1117,7 +1121,8 @@ void NS(Particles_buffer_print)( "------------------------\r\n" ); fprintf( fp, "particle block index = %8lu / %8lu\r\n", - ii + 1, nn ); + ( unsigned long int )( ii + 1 ), + ( unsigned long int )nn ); } NS(Particles_print)( fp, particles ); @@ -1163,7 +1168,7 @@ void NS(Particles_buffer_print_max_diff)( "------------------------\r\n" ); fprintf( fp, "particle block index = %8lu / %8lu\r\n", - ii + 1, nn ); + ( unsigned long )( ii + 1 ), ( unsigned long )nn ); } NS(Particles_print_max_diff)( fp, max_diff, max_diff_indices ); diff --git a/tests/sixtracklib/testlib/common/particles/particles.h b/tests/sixtracklib/testlib/common/particles/particles.h index 631d93430..7f201c7cb 100644 --- a/tests/sixtracklib/testlib/common/particles/particles.h +++ b/tests/sixtracklib/testlib/common/particles/particles.h @@ -399,16 +399,19 @@ SIXTRL_INLINE void NS(Particles_print_out_single)( NS(Particles_get_charge_ratio_value)( particles, index ) ); printf( "particle_id = %18ld\r\n", - NS(Particles_get_particle_id_value)( particles, index ) ); + ( long int )NS(Particles_get_particle_id_value)( + particles, index ) ); printf( "at_elem_id = %18ld\r\n", - NS(Particles_get_at_element_id_value)( particles, index ) ); + ( long int )NS(Particles_get_at_element_id_value)( + particles, index ) ); printf( "at_turn = %18ld\r\n", - NS(Particles_get_at_turn_value)( particles, index ) ); + ( long int )NS(Particles_get_at_turn_value)( + particles, index ) ); printf( "state = %18ld\r\n\r\n", - NS(Particles_get_state_value)( particles, index ) ); + ( long int )NS(Particles_get_state_value)( particles, index ) ); } #else /* !defined( _GPUCODE ) */ @@ -435,7 +438,7 @@ SIXTRL_INLINE void NS(Particles_print_out)( { if( num_particles > 1u ) { - printf( "particle id = %8lu\r\n", ii ); + printf( "particle id = %8lu\r\n", ( long unsigned )ii ); } NS(Particles_print_out_single)( particles, ii ); From dc9a9a133655525e7ec0943e6316108c44ee442f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:28:00 +0100 Subject: [PATCH 17/77] sixtracklib/testlib: fixes type inconsistent print formats --- .../common/beam_elements/beam_elements.c | 3 +- .../common/beam_elements/beam_elements.h | 3 +- .../testlib/common/buffer/buffer.c | 22 +++--- .../testlib/common/particles/particles.c | 69 ++++++++++--------- .../testlib/common/particles/particles.h | 13 ++-- 5 files changed, 60 insertions(+), 50 deletions(-) diff --git a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c index ab56ef9f2..4ccbcb7ff 100644 --- a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c +++ b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.c @@ -162,7 +162,8 @@ void NS(BeamElement_print)( printf( "|unknown | type_id = %3d\r\n" " | size = %8lu bytes\r\n" " | addr = %16p\r\n", - ( int )type_id, NS(Object_get_size)( be_info ), + ( int )type_id, + ( long unsigned )NS(Object_get_size)( be_info ), ( void* )( uintptr_t )addr ); } }; diff --git a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h index 70b4bc209..6fa0a8625 100644 --- a/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h +++ b/tests/sixtracklib/testlib/common/beam_elements/beam_elements.h @@ -250,7 +250,8 @@ SIXTRL_INLINE void NS(BeamElement_print_out)( printf( "|unknown | type_id = %3d;\r\n" " | size = %8lu bytes;\r\n" " | addr = %16p;\r\n", - ( int )type_id, NS(Object_get_size)( be_info ), + ( int )type_id, + ( long unsigned )NS(Object_get_size)( be_info ), ( void const* )( uintptr_t )addr ); } }; diff --git a/tests/sixtracklib/testlib/common/buffer/buffer.c b/tests/sixtracklib/testlib/common/buffer/buffer.c index e369aebc0..83dddf6e5 100644 --- a/tests/sixtracklib/testlib/common/buffer/buffer.c +++ b/tests/sixtracklib/testlib/common/buffer/buffer.c @@ -336,17 +336,17 @@ void NS(Buffer_object_print)( SIXTRL_ARGPTR_DEC FILE* fp, " | max_turn = %16ld ;\r\n" " | is_rolling = %16ld ;\r\n" " | out_store_addr = %16p ;\r\n", - ( int64_t )NS(ElemByElemConfig_get_order)( config ), - ( int64_t )NS(ElemByElemConfig_get_num_particles_to_store)( config ), - ( int64_t )NS(ElemByElemConfig_get_num_elements_to_store)( config ), - ( int64_t )NS(ElemByElemConfig_get_num_turns_to_store)( config ), - ( int64_t )NS(ElemByElemConfig_get_min_particle_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_max_particle_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_min_element_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_max_element_id)( config ), - ( int64_t )NS(ElemByElemConfig_get_min_turn)( config ), - ( int64_t )NS(ElemByElemConfig_get_max_turn)( config ), - config->is_rolling, + ( long int )NS(ElemByElemConfig_get_order)( config ), + ( long int )NS(ElemByElemConfig_get_num_particles_to_store)( config ), + ( long int )NS(ElemByElemConfig_get_num_elements_to_store)( config ), + ( long int )NS(ElemByElemConfig_get_num_turns_to_store)( config ), + ( long int )NS(ElemByElemConfig_get_min_particle_id)( config ), + ( long int )NS(ElemByElemConfig_get_max_particle_id)( config ), + ( long int )NS(ElemByElemConfig_get_min_element_id)( config ), + ( long int )NS(ElemByElemConfig_get_max_element_id)( config ), + ( long int )NS(ElemByElemConfig_get_min_turn)( config ), + ( long int )NS(ElemByElemConfig_get_max_turn)( config ), + ( long int )config->is_rolling, ( void* )( uintptr_t )config->out_store_addr ); } else if( ( type_id >= NS(OBJECT_TYPE_BINARY_ARRAY) ) && diff --git a/tests/sixtracklib/testlib/common/particles/particles.c b/tests/sixtracklib/testlib/common/particles/particles.c index 90723a042..66ee06925 100644 --- a/tests/sixtracklib/testlib/common/particles/particles.c +++ b/tests/sixtracklib/testlib/common/particles/particles.c @@ -620,16 +620,20 @@ void NS(Particles_print_single)( NS(Particles_get_charge_ratio_value)( particles, index ) ); fprintf( fp, "particle_id = %18ld\r\n", - NS(Particles_get_particle_id_value)( particles, index ) ); + ( long int )NS(Particles_get_particle_id_value)( + particles, index ) ); fprintf( fp, "at_elem_id = %18ld\r\n", - NS(Particles_get_at_element_id_value)( particles, index ) ); + ( long int )NS(Particles_get_at_element_id_value)( + particles, index ) ); fprintf( fp, "at_turn = %18ld\r\n", - NS(Particles_get_at_turn_value)( particles, index ) ); + ( long int )NS(Particles_get_at_turn_value)( + particles, index ) ); fprintf( fp, "state = %18ld\r\n\r\n", - NS(Particles_get_state_value)( particles, index ) ); + ( long int )NS(Particles_get_state_value)( + particles, index ) ); } return; @@ -651,7 +655,7 @@ void NS(Particles_print)( { if( num_particles > 1u ) { - fprintf( fp, "particle id = %8lu\r\n", ii ); + fprintf( fp, "particle id = %8lu\r\n", ( unsigned long )ii ); } NS(Particles_print_single)( fp, particles, ii ); @@ -686,107 +690,107 @@ void NS(Particles_print_max_diff)( fprintf( fp, "Delta |q0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_q0_value)( max_diff, 0 ), - max_diff_indices[ 0 ] ); + ( unsigned long )max_diff_indices[ 0 ] ); fprintf( fp, "Delta |mass0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_mass0_value)( max_diff, 0 ), - max_diff_indices[ 1 ] ); + ( unsigned long )max_diff_indices[ 1 ] ); fprintf( fp, "Delta |beta0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_beta0_value)( max_diff, 0 ), - max_diff_indices[ 2 ] ); + ( unsigned long )max_diff_indices[ 2 ] ); fprintf( fp, "Delta |gamma0| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_gamma0_value)( max_diff, 0 ), - max_diff_indices[ 3 ] ); + ( unsigned long )max_diff_indices[ 3 ] ); fprintf( fp, "Delta |p0c| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_p0c_value)( max_diff, 0 ), - max_diff_indices[ 4 ] ); + ( unsigned long )max_diff_indices[ 4 ] ); fprintf( fp, "Delta |s| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_s_value)( max_diff, 0 ), - max_diff_indices[ 5 ] ); + ( unsigned long )max_diff_indices[ 5 ] ); fprintf( fp, "Delta |x| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_x_value)( max_diff, 0 ), - max_diff_indices[ 6 ] ); + ( unsigned long )max_diff_indices[ 6 ] ); fprintf( fp, "Delta |y| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_y_value)( max_diff, 0 ), - max_diff_indices[ 7 ] ); + ( unsigned long )max_diff_indices[ 7 ] ); fprintf( fp, "Delta |px| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_px_value)( max_diff, 0 ), - max_diff_indices[ 8 ] ); + ( unsigned long )max_diff_indices[ 8 ] ); fprintf( fp, "Delta |py| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_py_value)( max_diff, 0 ), - max_diff_indices[ 9 ] ); + ( unsigned long )max_diff_indices[ 9 ] ); fprintf( fp, "Delta |zeta| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_zeta_value)( max_diff, 0 ), - max_diff_indices[ 10 ] ); + ( unsigned long )max_diff_indices[ 10 ] ); fprintf( fp, "Delta |psigma| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_psigma_value)( max_diff, 0 ), - max_diff_indices[ 11 ] ); + ( unsigned long )max_diff_indices[ 11 ] ); fprintf( fp, "Delta |delta| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_delta_value)( max_diff, 0 ), - max_diff_indices[ 12 ] ); + ( unsigned long )max_diff_indices[ 12 ] ); fprintf( fp, "Delta |rpp| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_rpp_value)( max_diff, 0 ), - max_diff_indices[ 13 ] ); + ( unsigned long )max_diff_indices[ 13 ] ); fprintf( fp, "Delta |rvv| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_rvv_value)( max_diff, 0 ), - max_diff_indices[ 14 ] ); + ( unsigned long )max_diff_indices[ 14 ] ); fprintf( fp, "Delta |chi| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_chi_value)( max_diff, 0 ), - max_diff_indices[ 15 ] ); + ( unsigned long )max_diff_indices[ 15 ] ); fprintf( fp, "Delta |charge_ratio| = %.16e " "max diff at index = %8lu\r\n", NS(Particles_get_charge_ratio_value)( max_diff, 0 ), - max_diff_indices[ 15 ] ); + ( unsigned long )max_diff_indices[ 15 ] ); fprintf( fp, "Delta |particle_id| = %22ld " "max diff at index = %8lu\r\n", - NS(Particles_get_particle_id_value)( max_diff, 0 ), - max_diff_indices[ 16 ] ); + ( long int )NS(Particles_get_particle_id_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 16 ] ); fprintf( fp, "Delta |at_elem_id| = %22ld " "max diff at index = %8lu\r\n", - NS(Particles_get_at_element_id_value)( max_diff, 0 ), - max_diff_indices[ 17 ] ); + ( long int )NS(Particles_get_at_element_id_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 17 ] ); fprintf( fp, "Delta |at_turn| = %22ld " "max diff at index = %8lu\r\n", - NS(Particles_get_at_turn_value)( max_diff, 0 ), - max_diff_indices[ 18 ] ); + ( long int )NS(Particles_get_at_turn_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 18 ] ); fprintf( fp, "Delta |state| = %22ld " "max diff at index = %8lu\r\n\r\n", - NS(Particles_get_state_value)( max_diff, 0 ), - max_diff_indices[ 19 ] ); + ( long int )NS(Particles_get_state_value)( max_diff, 0 ), + ( unsigned long )max_diff_indices[ 19 ] ); } return; @@ -1117,7 +1121,8 @@ void NS(Particles_buffer_print)( "------------------------\r\n" ); fprintf( fp, "particle block index = %8lu / %8lu\r\n", - ii + 1, nn ); + ( unsigned long int )( ii + 1 ), + ( unsigned long int )nn ); } NS(Particles_print)( fp, particles ); @@ -1163,7 +1168,7 @@ void NS(Particles_buffer_print_max_diff)( "------------------------\r\n" ); fprintf( fp, "particle block index = %8lu / %8lu\r\n", - ii + 1, nn ); + ( unsigned long )( ii + 1 ), ( unsigned long )nn ); } NS(Particles_print_max_diff)( fp, max_diff, max_diff_indices ); diff --git a/tests/sixtracklib/testlib/common/particles/particles.h b/tests/sixtracklib/testlib/common/particles/particles.h index 631d93430..7f201c7cb 100644 --- a/tests/sixtracklib/testlib/common/particles/particles.h +++ b/tests/sixtracklib/testlib/common/particles/particles.h @@ -399,16 +399,19 @@ SIXTRL_INLINE void NS(Particles_print_out_single)( NS(Particles_get_charge_ratio_value)( particles, index ) ); printf( "particle_id = %18ld\r\n", - NS(Particles_get_particle_id_value)( particles, index ) ); + ( long int )NS(Particles_get_particle_id_value)( + particles, index ) ); printf( "at_elem_id = %18ld\r\n", - NS(Particles_get_at_element_id_value)( particles, index ) ); + ( long int )NS(Particles_get_at_element_id_value)( + particles, index ) ); printf( "at_turn = %18ld\r\n", - NS(Particles_get_at_turn_value)( particles, index ) ); + ( long int )NS(Particles_get_at_turn_value)( + particles, index ) ); printf( "state = %18ld\r\n\r\n", - NS(Particles_get_state_value)( particles, index ) ); + ( long int )NS(Particles_get_state_value)( particles, index ) ); } #else /* !defined( _GPUCODE ) */ @@ -435,7 +438,7 @@ SIXTRL_INLINE void NS(Particles_print_out)( { if( num_particles > 1u ) { - printf( "particle id = %8lu\r\n", ii ); + printf( "particle id = %8lu\r\n", ( long unsigned )ii ); } NS(Particles_print_out_single)( particles, ii ); From 40947993c5dd73b94f9aa3351a53473170fa314e Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:38:01 +0100 Subject: [PATCH 18/77] sixtracklib: fixes issue with missing transient OpenCL include path --- sixtracklib/CMakeLists.txt | 2 +- sixtracklib/opencl/CMakeLists.txt | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/sixtracklib/CMakeLists.txt b/sixtracklib/CMakeLists.txt index a10a96627..1926f4435 100644 --- a/sixtracklib/CMakeLists.txt +++ b/sixtracklib/CMakeLists.txt @@ -87,7 +87,7 @@ endif() if( TARGET sixtrack_opencl ) target_include_directories( sixtrack - PUBLIC $ ) + PUBLIC $ ) endif() set_target_properties( sixtrack PROPERTIES diff --git a/sixtracklib/opencl/CMakeLists.txt b/sixtracklib/opencl/CMakeLists.txt index 14a9e5969..a9d086f9e 100644 --- a/sixtracklib/opencl/CMakeLists.txt +++ b/sixtracklib/opencl/CMakeLists.txt @@ -73,9 +73,6 @@ set( SIXTRACKLIB_OPENCL_KERNEL_SOURCES kernels/assign_address_item.cl ) -message( STATUS "----- SIXTRL_OPENCL_INCLUDE_DIRS: ${SIXTRL_OPENCL_INCLUDE_DIRS}" ) -message( STATUS "----- SIXTRL_OPENCL_LIBRARIES: ${SIXTRL_OPENCL_LIBRARIES}" ) - add_library( sixtrack_opencl OBJECT ${SIXTRACKLIB_OPENCL_HEADERS} ${SIXTRACKLIB_OPENCL_SOURCES} ) From e6598c0904266be98d5d7389c14681d107c529ee Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:38:49 +0100 Subject: [PATCH 19/77] cmake: removes non-necessary prints from SetupOpenCL; cosmetic fixes --- cmake/SetupOpenCL.cmake | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/cmake/SetupOpenCL.cmake b/cmake/SetupOpenCL.cmake index 17e6cfd5d..6f1e413ea 100644 --- a/cmake/SetupOpenCL.cmake +++ b/cmake/SetupOpenCL.cmake @@ -15,16 +15,13 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) set( SIXTRL_OPENCL_LIBRARIES ) if( NOT OpenCL_FOUND ) - message( STATUS "---- Checking for OpenCL installation ... " ) find_package( OpenCL QUIET ) endif() if( OpenCL_FOUND ) set( SIXTRL_TEMP_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS} ) set( SIXTRL_OPENCL_LIBRARIES ${OpenCL_LIBRARIES} ) - message( STATUS "---- OpenCL environment found (ver. ${OpenCL_VERSION})" ) - message( STATUS "---- OpenCL library ${OpenCL_LIBRARIES}" ) - message( STATUS "---- OpenCL include directories ${OpenCL_INCLUDE_DIRS}" ) + message( STATUS "---- OpenCL environment found (ver. ${OpenCL_VERSION_STRING})" ) elseif( SIXTRACKL_REQUIRE_OFFLINE_BUILD ) message( STATUS "---- OpenCL not found, use fallback headers due to offline build" ) set( SIXTRL_TEMP_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/external" ) @@ -195,7 +192,6 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( OpenCL_FOUND ) - message( STATUS "---- OpenCL environment found (ver. ${OpenCL_VERSION} )" ) set( SIXTRACKLIB_MODULE_VALUE_OPENCL 1 ) elseif( NOT OpenCL_FOUND AND NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD ) FetchContent_Declare( opencl_icd_loader @@ -258,12 +254,10 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) endif() set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG 1 ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - "#if !defined( ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} )\r\n" ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - " #define ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} \r\n" ) - string( APPEND SIXTRL_OPENCL_ENABLE_EXCEPTION_STR - "#endif /* !defined( ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} ) */\r\n" ) + set( SIXTRL_OPENCL_ENABLE_EXCEPTION_STR + "#if !defined( ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} ) + #define ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} + #endif /* !defined( ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR_MACRO} ) */" ) endif() endif() From d8970826f930188943a42e1bb3cb7234142e0b6e Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:45:17 +0100 Subject: [PATCH 20/77] sixtracklib/opencl: adds missing CL_TARGET_OPENCL_VERSION macro to opencl.h --- sixtracklib/opencl/opencl.h.template | 8 ++++++++ sixtracklib/opencl/opencl.hpp.template | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sixtracklib/opencl/opencl.h.template b/sixtracklib/opencl/opencl.h.template index ac42c3fce..026c90880 100644 --- a/sixtracklib/opencl/opencl.h.template +++ b/sixtracklib/opencl/opencl.h.template @@ -9,6 +9,14 @@ #define SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS "@SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS@" #endif /* !defined( SIXTRL_DEFAULT_OPENCL_COMPILER_FLAGS ) */ +#if !defined( CL_MINIMUM_OPENCL_VERSION ) + #define CL_MINIMUM_OPENCL_VERSION @SIXTRACKL_MIN_OPENCL_VERSION@ +#endif /* !defined( CL_MINIMUM_OPENCL_VERSION ) */ + +#if !defined( CL_TARGET_OPENCL_VERSION ) + #define CL_TARGET_OPENCL_VERSION @SIXTRACKL_TARGET_OPENCL_VERSION@ +#endif /* !defined( CL_TARGET_OPENCL_VERSION ) */ + #if !defined( __cplusplus ) && !defined( _GPUCODE ) #if !defined( SIXTRL_OPENCL_C99_HEADER_FILE ) diff --git a/sixtracklib/opencl/opencl.hpp.template b/sixtracklib/opencl/opencl.hpp.template index a6e5bacaf..e5da08652 100644 --- a/sixtracklib/opencl/opencl.hpp.template +++ b/sixtracklib/opencl/opencl.hpp.template @@ -13,7 +13,7 @@ #endif /* !defined( CL_HPP_MINIMUM_OPENCL_VERSION ) */ #if !defined( CL_HPP_TARGET_OPENCL_VERSION ) - #define CL_HPP_TARGET_OPENCL_VERSION @SIXTRACKL_MIN_OPENCL_VERSION@ + #define CL_HPP_TARGET_OPENCL_VERSION @SIXTRACKL_TARGET_OPENCL_VERSION@ #endif /* !defined( CL_HPP_TARGET_OPENCL_VERSION ) */ #if !defined( SIXTRL_OPENCL_CXX_HEADER_FILE ) From 7fadab95f9f223a67247a41133fadc7c82c0747a Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:46:28 +0100 Subject: [PATCH 21/77] tools: fixes inconsistent print type codes --- tools/dump_cbuffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/dump_cbuffer.c b/tools/dump_cbuffer.c index 6673bac40..898bb9761 100644 --- a/tools/dump_cbuffer.c +++ b/tools/dump_cbuffer.c @@ -54,15 +54,15 @@ int main( int argc, char* argv[] ) " buf size = %16lu\r\n" " buf capacity = %16lu\r\n" " begin addr = %16p\r\n\r\n", - ( uint64_t )num_objects, ( uint64_t )num_slots, - ( uint64_t )num_dataptrs, ( uint64_t )num_garbage, - ( uint64_t )buffer_size, ( uint64_t )buffer_capacity, + ( unsigned long )num_objects, ( unsigned long )num_slots, + ( unsigned long )num_dataptrs, ( unsigned long )num_garbage, + ( unsigned long )buffer_size, ( unsigned long )buffer_capacity, ( void* )( uintptr_t )addr ); for( ; it != end ; ++it, ++ii ) { printf( "Object %9lu / %9lu:\r\n", - ( uint64_t )ii, ( uint64_t )num_objects ); + ( unsigned long )ii, ( unsigned long )num_objects ); NS(Buffer_object_print_out_typeid)( NS(Object_get_type_id)( it ) ); printf( "\r\n" ); From 7500ef7b27958cc18f6b318adf4405b7fdc8af84 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:59:04 +0100 Subject: [PATCH 22/77] examples/c99: fixes inconsistent type format statements in printf --- examples/c99/simple_particles_buffer.c | 6 ++++-- examples/c99/track_bbsimple.c | 3 ++- examples/c99/track_beambeam.c | 3 ++- examples/c99/track_lhc_bb.c | 2 +- examples/c99/track_lhc_no_bb.c | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/c99/simple_particles_buffer.c b/examples/c99/simple_particles_buffer.c index 7205eca7a..d8f948fa4 100644 --- a/examples/c99/simple_particles_buffer.c +++ b/examples/c99/simple_particles_buffer.c @@ -91,7 +91,8 @@ int main( int argc, char* argv[] ) )st_Object_get_const_begin_ptr( cmp_obj_it ); printf( "particle blocks: %3lu / %3lu\r\n", - ii, num_particles_blocks ); + ( unsigned int )ii, + ( unsigned int )num_particles_blocks ); printf( " -> original particles object : \r\n" ); printParticles( cmp_particles ); @@ -148,7 +149,8 @@ int main( int argc, char* argv[] ) )st_Object_get_const_begin_ptr( obj_it ); printf( "managed buffer particle blocks: %3lu / %3lu\r\n", - ii, num_particles_blocks ); + ( unsigned int )ii, + ( unsigned int )num_particles_blocks ); printf( " -> copy particles object : \r\n" ); printParticles( particles ); diff --git a/examples/c99/track_bbsimple.c b/examples/c99/track_bbsimple.c index 5729e2545..f08c39508 100644 --- a/examples/c99/track_bbsimple.c +++ b/examples/c99/track_bbsimple.c @@ -70,7 +70,8 @@ int main( int argc, char* argv[] ) } printf( "Use: NUM_PARTICLES = %10lu\r\n" - " NUM_TURNS = %10lu\r\n\r\n", NUM_PARTICLES, NUM_TURNS ); + " NUM_TURNS = %10lu\r\n\r\n", + ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from Input Example Particle Data **** */ diff --git a/examples/c99/track_beambeam.c b/examples/c99/track_beambeam.c index 310a25bde..d65942bf2 100644 --- a/examples/c99/track_beambeam.c +++ b/examples/c99/track_beambeam.c @@ -70,7 +70,8 @@ int main( int argc, char* argv[] ) } printf( "Use: NUM_PARTICLES = %10lu\r\n" - " NUM_TURNS = %10lu\r\n\r\n", NUM_PARTICLES, NUM_TURNS ); + " NUM_TURNS = %10lu\r\n\r\n", + ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from Input Example Particle Data **** */ diff --git a/examples/c99/track_lhc_bb.c b/examples/c99/track_lhc_bb.c index 12735afce..a85c6336d 100644 --- a/examples/c99/track_lhc_bb.c +++ b/examples/c99/track_lhc_bb.c @@ -72,7 +72,7 @@ int main( int argc, char* argv[] ) printf( "Selected NUM_PARTICLES = %10lu\r\n" "Selected NUM_TURNS = %10lu\r\n" - "\r\n", NUM_PARTICLES, NUM_TURNS ); + "\r\n", ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from LHC Particle Dump Data **** */ diff --git a/examples/c99/track_lhc_no_bb.c b/examples/c99/track_lhc_no_bb.c index 003074dd0..a2ba835e2 100644 --- a/examples/c99/track_lhc_no_bb.c +++ b/examples/c99/track_lhc_no_bb.c @@ -72,7 +72,7 @@ int main( int argc, char* argv[] ) printf( "Selected NUM_PARTICLES = %10lu\r\n" "Selected NUM_TURNS = %10lu\r\n" - "\r\n", NUM_PARTICLES, NUM_TURNS ); + "\r\n", ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from LHC Particle Dump Data **** */ From aa9271bb7a51da1cef79c703b7849326bf26ae8f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 22:59:17 +0100 Subject: [PATCH 23/77] tools: fixes inconsistent type format statements in printf --- tools/normalize_cobject_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/normalize_cobject_dump.c b/tools/normalize_cobject_dump.c index f5092f550..84026ff32 100644 --- a/tools/normalize_cobject_dump.c +++ b/tools/normalize_cobject_dump.c @@ -19,7 +19,7 @@ int main( int const argc, char* argv[] ) { printf( "Usage: %s PATH_TO_DUMP_FILE [BASE_ADDR=%lu] " "[PATH_TO_OUTPUT_FILE=PATH_TO_DUMP_FILE]\r\n", - argv[ 0 ], target_base_addr ); + argv[ 0 ], ( unsigned int )target_base_addr ); } if( argc >= 2 ) From cb5852eb16dd32889dd1b83ab8e91e85ba628d42 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 23:03:19 +0100 Subject: [PATCH 24/77] examples/c99: fixes inconsistent type format statements in printf --- examples/c99/simple_particles_buffer.c | 8 ++++---- examples/c99/track_bbsimple.c | 2 +- examples/c99/track_beambeam.c | 2 +- examples/c99/track_lhc_bb.c | 2 +- examples/c99/track_lhc_no_bb.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/c99/simple_particles_buffer.c b/examples/c99/simple_particles_buffer.c index d8f948fa4..ccac018bf 100644 --- a/examples/c99/simple_particles_buffer.c +++ b/examples/c99/simple_particles_buffer.c @@ -91,8 +91,8 @@ int main( int argc, char* argv[] ) )st_Object_get_const_begin_ptr( cmp_obj_it ); printf( "particle blocks: %3lu / %3lu\r\n", - ( unsigned int )ii, - ( unsigned int )num_particles_blocks ); + ( unsigned long )ii, + ( unsigned long )num_particles_blocks ); printf( " -> original particles object : \r\n" ); printParticles( cmp_particles ); @@ -149,8 +149,8 @@ int main( int argc, char* argv[] ) )st_Object_get_const_begin_ptr( obj_it ); printf( "managed buffer particle blocks: %3lu / %3lu\r\n", - ( unsigned int )ii, - ( unsigned int )num_particles_blocks ); + ( unsigned long )ii, + ( unsigned long )num_particles_blocks ); printf( " -> copy particles object : \r\n" ); printParticles( particles ); diff --git a/examples/c99/track_bbsimple.c b/examples/c99/track_bbsimple.c index f08c39508..ccb0f8b39 100644 --- a/examples/c99/track_bbsimple.c +++ b/examples/c99/track_bbsimple.c @@ -71,7 +71,7 @@ int main( int argc, char* argv[] ) printf( "Use: NUM_PARTICLES = %10lu\r\n" " NUM_TURNS = %10lu\r\n\r\n", - ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); + ( unsigned long )NUM_PARTICLES, ( unsigned long )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from Input Example Particle Data **** */ diff --git a/examples/c99/track_beambeam.c b/examples/c99/track_beambeam.c index d65942bf2..558f1087d 100644 --- a/examples/c99/track_beambeam.c +++ b/examples/c99/track_beambeam.c @@ -71,7 +71,7 @@ int main( int argc, char* argv[] ) printf( "Use: NUM_PARTICLES = %10lu\r\n" " NUM_TURNS = %10lu\r\n\r\n", - ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); + ( unsigned long )NUM_PARTICLES, ( unsigned long )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from Input Example Particle Data **** */ diff --git a/examples/c99/track_lhc_bb.c b/examples/c99/track_lhc_bb.c index a85c6336d..c1370f51d 100644 --- a/examples/c99/track_lhc_bb.c +++ b/examples/c99/track_lhc_bb.c @@ -72,7 +72,7 @@ int main( int argc, char* argv[] ) printf( "Selected NUM_PARTICLES = %10lu\r\n" "Selected NUM_TURNS = %10lu\r\n" - "\r\n", ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); + "\r\n", ( unsigned long )NUM_PARTICLES, ( unsigned long )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from LHC Particle Dump Data **** */ diff --git a/examples/c99/track_lhc_no_bb.c b/examples/c99/track_lhc_no_bb.c index a2ba835e2..b9986ab7b 100644 --- a/examples/c99/track_lhc_no_bb.c +++ b/examples/c99/track_lhc_no_bb.c @@ -72,7 +72,7 @@ int main( int argc, char* argv[] ) printf( "Selected NUM_PARTICLES = %10lu\r\n" "Selected NUM_TURNS = %10lu\r\n" - "\r\n", ( unsigned int )NUM_PARTICLES, ( unsigned int )NUM_TURNS ); + "\r\n", ( unsigned long )NUM_PARTICLES, ( unsigned long )NUM_TURNS ); /* ********************************************************************** */ /* **** Building Particles Data from LHC Particle Dump Data **** */ From 26afb354bbb1159c8f64d9be32de9d2580aea3f2 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 23:03:27 +0100 Subject: [PATCH 25/77] tools: fixes inconsistent type format statements in printf --- tools/normalize_cobject_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/normalize_cobject_dump.c b/tools/normalize_cobject_dump.c index 84026ff32..04b062803 100644 --- a/tools/normalize_cobject_dump.c +++ b/tools/normalize_cobject_dump.c @@ -19,7 +19,7 @@ int main( int const argc, char* argv[] ) { printf( "Usage: %s PATH_TO_DUMP_FILE [BASE_ADDR=%lu] " "[PATH_TO_OUTPUT_FILE=PATH_TO_DUMP_FILE]\r\n", - argv[ 0 ], ( unsigned int )target_base_addr ); + argv[ 0 ], ( unsigned long )target_base_addr ); } if( argc >= 2 ) From 60b29fc3a09b17335d284b0e322f5bc1b2297eac Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 23:39:40 +0100 Subject: [PATCH 26/77] Settings.cmake: add option to always download latest OpenCL headers - Deprecate the "use legacy c++ header" -> this is now discovery based, the user should not have to use this setting. It is kept for the sake of compability --- Settings.cmake.default | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Settings.cmake.default b/Settings.cmake.default index 7d9d28c3c..94a9fc12c 100644 --- a/Settings.cmake.default +++ b/Settings.cmake.default @@ -131,7 +131,11 @@ if( SIXTRACKL_ENABLE_OPENCL ) set( SIXTRACKL_TARGET_OPENCL_VERSION "120" CACHE STRING "Target OpenCL 1.x version" ) - option( SIXTRACKL_USE_LEGACY_CL_HPP "Use legacy cl.hpp header" OFF ) + option( SIXTRACKL_USE_LEGACY_CL_HPP + "Use legacy cl.hpp header [DEPRECATED!]" OFF ) + + option( SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS + "Always try to download latest stable OpenCL headers" OFF ) option( SIXTRACKL_OPENCL_CXX_ENABLE_EXCEPTIONS "Enable OpenCL 1.x Host-side C++ Exceptions" ON ) From bc2b4a8d273844dec44f5c1e320a3b8decf4b9b4 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 23:40:58 +0100 Subject: [PATCH 27/77] cmake: updates SetupOpenCL.cmake - simplifies the the detection of the headers in case they are downloaded -> this no longer yields random results depending on the order but always a predictible outcome, sorted by most desireable headers first - supports the "always downloads headers" option from Settings.cmake - verifies that the OpenCL ICD loaded can successfully be built --- cmake/SetupOpenCL.cmake | 110 +++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 58 deletions(-) diff --git a/cmake/SetupOpenCL.cmake b/cmake/SetupOpenCL.cmake index 6f1e413ea..9e0fa7738 100644 --- a/cmake/SetupOpenCL.cmake +++ b/cmake/SetupOpenCL.cmake @@ -11,8 +11,14 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) # Provide include directories and library directories for OpenCL, if enabled if( SIXTRACKL_ENABLE_OPENCL ) + if( NOT DEFINED SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS ) + set( SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OFF ) + endif() + set( SIXTRL_OPENCL_INCLUDE_DIRS ) set( SIXTRL_OPENCL_LIBRARIES ) + set( SIXTRL_OPENCL_C99_HEADERS_DOWNLOADED 0 ) + set( SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED 0 ) if( NOT OpenCL_FOUND ) find_package( OpenCL QUIET ) @@ -23,11 +29,11 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) set( SIXTRL_OPENCL_LIBRARIES ${OpenCL_LIBRARIES} ) message( STATUS "---- OpenCL environment found (ver. ${OpenCL_VERSION_STRING})" ) elseif( SIXTRACKL_REQUIRE_OFFLINE_BUILD ) - message( STATUS "---- OpenCL not found, use fallback headers due to offline build" ) - set( SIXTRL_TEMP_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/external" ) - else() message( FATAL_ERROR "---- Unable to find OpenCL setup, unable to download since offline build required" ) + else() + message( STATUS "---- OpenCL not found, use fallback headers due to offline build" ) + set( SIXTRL_TEMP_INCLUDE_DIRS "${CMAKE_SOURCE_DIR}/external" ) endif() foreach( dir ${SIXTRL_TEMP_INCLUDE_DIRS} ) @@ -63,18 +69,18 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( NOT OpenCL_FOUND OR - NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION OR - NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) + SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OR + NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE OR + NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE ) set( SIXTRL_OPENCL_EXT_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include" ) include( FetchContent ) endif() - set( SIXTRL_OPENCL_USE_DOWNOADED_HEADERS OFF ) - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD AND ( + SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OR NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE OR NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) ) @@ -99,33 +105,26 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) endif() if( opencl_c99_headers_POPULATED ) - file( GLOB SIXTRL_OPENCL_C99_IN_FILES - "${opencl_c99_headers_SOURCE_DIR}/CL/*.h" ) - if( SIXTRL_OPENCL_C99_IN_FILES ) - set( SIXTRL_OPENCL_USE_DOWNOADED_HEADERS ON ) - endif() + file( COPY "${opencl_c99_headers_SOURCE_DIR}/CL/" + DESTINATION "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" + PATTERN "*.h" ) - foreach( in_file ${SIXTRL_OPENCL_C99_IN_FILES} ) - get_filename_component( in_file_name ${in_file} NAME ) - file( COPY ${in_file} - DESTINATION "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) - - if( NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE ) - if( ${in_file_name} STREQUAL "opencl.h" ) - set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/opencl.h" ) - set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) - set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 3 ) - elseif( ${in_file_name} STREQUAL "cl.h" ) - set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/cl.h" ) - set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) - set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 1 ) - endif() - endif() - endforeach() + if( EXISTS "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL/opencl.h" ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/opencl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 3 ) + set( SIXTRL_OPENCL_C99_HEADERS_DOWNLOADED 1 ) + elseif( EXISTS "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL/cl.h" ) + set( SIXTRL_OPENCL_C99_HEADER_FILE "CL/cl.h" ) + set( SIXTRL_OPENCL_C99_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION 1 ) + set( SIXTRL_OPENCL_C99_HEADERS_DOWNLOADED 1 ) + endif() endif() endif() if( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD AND ( + SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OR NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE OR NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) ) if( NOT EXISTS ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) @@ -149,33 +148,26 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) endif() if( opencl_cxx_headers_POPULATED ) - file( GLOB SIXTRL_OPENCL_CXX_IN_FILES - "${opencl_cxx_headers_SOURCE_DIR}/include/CL/*.hpp" ) - if( SIXTRL_OPENCL_CXX_IN_FILES ) - set( SIXTRL_OPENCL_USE_DOWNOADED_HEADERS ON ) - endif() + file( COPY "${opencl_cxx_headers_SOURCE_DIR}/include/CL/" + DESTINATION "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" + PATTERN "*.hpp" ) - foreach( in_file ${SIXTRL_OPENCL_CXX_IN_FILES} ) - get_filename_component( in_file_name ${in_file} NAME ) - file( COPY ${in_file} - DESTINATION "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) - - if( NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE ) - if( ${in_file_name} STREQUAL "opencl.hpp" ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/opencl.hpp" ) - set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 3 ) - elseif( ${in_file_name} STREQUAL "cl2.hpp" ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl2.hpp" ) - set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 2 ) - elseif( ${in_file_name} STREQUAL "cl.hpp" ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl.hpp" ) - set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) - set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 1 ) - endif() - endif() - endforeach() + if( EXISTS "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL/opencl.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/opencl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 3 ) + set( SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED 1 ) + elseif( EXISTS "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL/cl2.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl2.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 2 ) + set( SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED 1 ) + elseif( EXISTS "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL/cl.hpp" ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE "CL/cl.hpp" ) + set( SIXTRL_OPENCL_CXX_INCLUDE_DIR ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) + set( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION 1 ) + set( SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED 1 ) + endif() endif() endif() @@ -206,10 +198,12 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) endif() get_filename_component( SIXTRL_TEMP_INCLUDE_DIRS - ${SIXTRL_OPENCL_C99_HEADER_FILE} DIRECTORY ) + "${SIXTRL_OPENCL_C99_INCLUDE_DIR}/${SIXTRL_OPENCL_C99_HEADER_FILE}" + DIRECTORY ) - file( COPY ${SIXTRL_TEMP_INCLUDE_DIRS} DESTINATION - ${opencl_icd_loader_SOURCE_DIR}/inc PATTERN "*.h" ) + file( COPY "${SIXTRL_TEMP_INCLUDE_DIRS}/" + DESTINATION "${opencl_icd_loader_SOURCE_DIR}/inc" + PATTERN "*.h" ) FetchContent_MakeAvailable( opencl_icd_loader ) set( SIXTRL_OPENCL_LIBRARIES ${SIXTRL_OPENCL_LIBRARIES} OpenCL ) From c395684e2e516beccce532b5e1f94fdd155616c8 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 23:44:28 +0100 Subject: [PATCH 28/77] sixtracklib/opencl: suppresses pedantic warnings when using the OpenCL c++ header --- sixtracklib/opencl/opencl.hpp.template | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sixtracklib/opencl/opencl.hpp.template b/sixtracklib/opencl/opencl.hpp.template index e5da08652..6569057ec 100644 --- a/sixtracklib/opencl/opencl.hpp.template +++ b/sixtracklib/opencl/opencl.hpp.template @@ -24,6 +24,10 @@ #define SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION @SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION@ #endif /* !defined( SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) */ +#if !defined( SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED ) + #define SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED @SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED@ +#endif /* !defined( SIXTRL_OPENCL_CXX_HEADERS_DOWNLOADED ) */ + #if !defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) #define SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG @SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG@ #endif /* !defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) */ @@ -39,6 +43,7 @@ #if defined( __GNUC__ ) && __GNUC__ >= 6 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-attributes" + #pragma GCC diagnostic ignored "-Wpedantic" #endif #include <@SIXTRL_OPENCL_CXX_HEADER_FILE@> From 113016347da23f6111a5487b5c7615cd3e86f920 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 23:45:09 +0100 Subject: [PATCH 29/77] sixtracklib/opencl: encodes whether the headers have been downloaded or found locally --- sixtracklib/opencl/opencl.h.template | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sixtracklib/opencl/opencl.h.template b/sixtracklib/opencl/opencl.h.template index 026c90880..3516a3861 100644 --- a/sixtracklib/opencl/opencl.h.template +++ b/sixtracklib/opencl/opencl.h.template @@ -27,6 +27,10 @@ #define SIXTRL_OPENCL_C99_HEADER_FILE_VERSION @SIXTRL_OPENCL_C99_HEADER_FILE_VERSION@ #endif /* !defined( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) */ + #if !defined( SIXTRL_OPENCL_C99_HEADERS_DOWNLOADED ) + #define SIXTRL_OPENCL_C99_HEADERS_DOWNLOADED @SIXTRL_OPENCL_C99_HEADERS_DOWNLOADED@ + #endif /* !defined( SIXTRL_OPENCL_C99_HEADERS_DOWNLOADED ) */ + #if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) #include <@SIXTRL_OPENCL_C99_HEADER_FILE@> #endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ From 8a359b65ee010cd8932581c7f927e13d59afcec2 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 25 Nov 2020 23:50:57 +0100 Subject: [PATCH 30/77] python: adds scipy requirement to setup.py - Fixes issue SixTrack:SixTrackLib#135 --- python/setup.py.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/setup.py.template b/python/setup.py.template index ed7e4fd8a..4f0021bc9 100644 --- a/python/setup.py.template +++ b/python/setup.py.template @@ -14,7 +14,7 @@ setuptools.setup( 'sixtracklib': [ 'sixtracklib/@PYTHON_SIXTRACKLIB_SHARED_LIB@' ], 'sixtracklib_test': [ 'sixtracklib_test/@PYTHON_SIXTRACKLIB_TEST_SHARED_LIB@' ] }, - install_requires=['numpy','cobjects>=0.0.4','pysixtrack>=0.0.0','sixtracktools>=0.0.2'] + install_requires=['numpy', 'scipy', 'cobjects>=0.0.4','pysixtrack>=0.6.0','sixtracktools>=0.0.2'] ) # end python/setup.py From 0bc76c9c197ad7b6a4e926de941e11dd77a16e20 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 00:06:08 +0100 Subject: [PATCH 31/77] cmake: clarifies conditions for downloading headers --- cmake/SetupOpenCL.cmake | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cmake/SetupOpenCL.cmake b/cmake/SetupOpenCL.cmake index 9e0fa7738..8225801ab 100644 --- a/cmake/SetupOpenCL.cmake +++ b/cmake/SetupOpenCL.cmake @@ -79,10 +79,10 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD AND ( - SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OR - NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE OR - NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) ) + if( ( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD ) AND + ( SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OR + NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE OR + NOT DEFINED SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ) ) if( NOT EXISTS "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) file( MAKE_DIRECTORY "${SIXTRL_OPENCL_EXT_INCLUDE_DIR}/CL" ) @@ -123,10 +123,10 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) endif() endif() - if( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD AND ( - SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OR - NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE OR - NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) ) + if( ( NOT SIXTRACKL_REQUIRE_OFFLINE_BUILD ) AND + ( SIXTRACKL_OPENCL_ALWAYS_DOWNLOAD_HEADERS OR + NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE OR + NOT DEFINED SIXTRL_OPENCL_CXX_HEADER_FILE_VERSION ) ) if( NOT EXISTS ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) file( MAKE_DIRECTORY ${SIXTRL_OPENCL_EXT_INCLUDE_DIR} ) endif() From 138f09d4ab5e89a5c1d455eeecd5ce3c53bb0e66 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 11:25:31 +0100 Subject: [PATCH 32/77] python: fixes wrong version number `setup.py` required `pysixtrack >= 0.6.0` instead of `pysixtrack >= 0.0.6` , i.e. a typo. This should make things installable again --- python/setup.py.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/setup.py.template b/python/setup.py.template index 4f0021bc9..4ea9fa842 100644 --- a/python/setup.py.template +++ b/python/setup.py.template @@ -14,7 +14,7 @@ setuptools.setup( 'sixtracklib': [ 'sixtracklib/@PYTHON_SIXTRACKLIB_SHARED_LIB@' ], 'sixtracklib_test': [ 'sixtracklib_test/@PYTHON_SIXTRACKLIB_TEST_SHARED_LIB@' ] }, - install_requires=['numpy', 'scipy', 'cobjects>=0.0.4','pysixtrack>=0.6.0','sixtracktools>=0.0.2'] + install_requires=['numpy', 'scipy', 'cobjects>=0.0.4','pysixtrack>=0.0.6','sixtracktools>=0.0.2'] ) # end python/setup.py From 266465f6f67c3e3fab3c519fb263d6795ab36d9e Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 12:34:18 +0100 Subject: [PATCH 33/77] tests/testlib: adds init functions for testcases - Adds init function for a FODO lattice that is consistent with simpletrack - Adds init function for a particle distribution - Fixes extern method decorator for existing testdata methods --- .../testlib/testdata/track_testdata.c | 113 +++++++++++++++--- .../testlib/testdata/track_testdata.h | 24 ++-- 2 files changed, 114 insertions(+), 23 deletions(-) diff --git a/tests/sixtracklib/testlib/testdata/track_testdata.c b/tests/sixtracklib/testlib/testdata/track_testdata.c index 7d2e33c4b..bdf631a84 100644 --- a/tests/sixtracklib/testlib/testdata/track_testdata.c +++ b/tests/sixtracklib/testlib/testdata/track_testdata.c @@ -1,5 +1,6 @@ #include "sixtracklib/testlib/testdata/track_testdata.h" +#include #include #include #include @@ -11,21 +12,12 @@ #include "sixtracklib/common/beam_elements.h" #include "sixtracklib/common/buffer.h" -extern SIXTRL_HOST_FN NS(Buffer)* - NS(TrackTestdata_extract_initial_particles_buffer)( const char path_to_file[] ); - -extern SIXTRL_HOST_FN NS(Buffer)* - NS(TrackTestdata_extract_result_particles_buffer)( const char path_to_file[] ); - -extern SIXTRL_HOST_FN NS(Buffer)* - NS(TrackTestdata_extract_beam_elements_buffer)( const char path_to_file[] ); - /* ************************************************************************ */ /* ****** Implementation of functions ****** */ /* ************************************************************************ */ -SIXTRL_HOST_FN NS(Buffer)* NS(TrackTestdata_extract_initial_particles_buffer)( - const char path_to_file[] ) +SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* +NS(TrackTestdata_extract_initial_particles_buffer)( const char path_to_file[] ) { typedef NS(buffer_size_t) buf_size_t; typedef NS(Particles) particles_t; @@ -127,8 +119,8 @@ SIXTRL_HOST_FN NS(Buffer)* NS(TrackTestdata_extract_initial_particles_buffer)( return init_particles_buffer; } -SIXTRL_HOST_FN NS(Buffer)* - NS(TrackTestdata_extract_result_particles_buffer)( const char path_to_file[] ) +SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* +NS(TrackTestdata_extract_result_particles_buffer)( const char path_to_file[] ) { typedef NS(buffer_size_t) buf_size_t; typedef NS(Particles) particles_t; @@ -243,8 +235,8 @@ SIXTRL_HOST_FN NS(Buffer)* return result_particles_buffer; } -SIXTRL_HOST_FN NS(Buffer)* - NS(TrackTestdata_extract_beam_elements_buffer)( const char path_to_file[] ) +SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* +NS(TrackTestdata_extract_beam_elements_buffer)( const char path_to_file[] ) { typedef NS(buffer_size_t) buf_size_t; typedef NS(buffer_addr_t) address_t; @@ -503,3 +495,94 @@ SIXTRL_HOST_FN NS(Buffer)* return beam_elements_buffer; } + +/* -------------------------------------------------------------------------- */ + +void NS(TrackTestdata_generate_fodo_lattice)( + SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* SIXTRL_RESTRICT buffer, + unsigned int const num_turns ) +{ + NS(Multipole)* dipole0 = SIXTRL_NULLPTR; + NS(Multipole)* dipole1 = SIXTRL_NULLPTR; + NS(Multipole)* q0 = SIXTRL_NULLPTR; + NS(Multipole)* q1 = SIXTRL_NULLPTR; + NS(Cavity)* cavity = SIXTRL_NULLPTR; + NS(BeamMonitor)* monitor = SIXTRL_NULLPTR; + + unsigned int const monitor_num_stores = ( 100u > num_turns ) + ? num_turns : 100u; + + dipole0 = NS(Multipole_add)( buffer, 0, 1.0, 0.1570796327, 0.0, 0u ); + NS(Multipole_set_knl_value)( dipole0, 0, 0.1570796327 ); + NS(Drift_add)( buffer, 5.0 ); + + + q0 = NS(Multipole_add)( buffer, 1, 0.0, 0.0, 0.0, 0u ); + NS(Multipole_set_knl_value)( q0, 1, 0.1657145946 ); + + NS(Drift_add)( buffer, 5.0 ); + + dipole1 = NS(Multipole_add)( buffer, 0, 1.0, 0.1570796327, 0.0, 0u ); + NS(Multipole_set_knl_value)( dipole1, 0, 0.1570796327 ); + + NS(Drift_add)( buffer, 5.0 ); + + q1 = NS(Multipole_add)( buffer, 1, 0.0, 0.0, 0.0, 0u ); + NS(Multipole_set_knl_value)( q1, 1, -0.1685973315 ); + + NS(Drift_add)( buffer, 5.0 ); + + cavity = NS(Cavity_new)( buffer ); + NS(Cavity_set_voltage)( cavity, 5000000.0 ); + NS(Cavity_set_frequency)( cavity, 239833966.4 ); + NS(Cavity_set_lag)( cavity, 180.0 ); + + monitor = NS(BeamMonitor_new)( buffer ); + NS(BeamMonitor_set_num_stores)( monitor, monitor_num_stores); + NS(BeamMonitor_set_start)( monitor, 0 ); + NS(BeamMonitor_set_skip)( monitor, 0 ); + NS(BeamMonitor_set_is_rolling)( monitor, true ); +} + + +void NS(TrackTestdata_generate_particle_distr_x)( + SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* SIXTRL_RESTRICT buffer, + unsigned int const num_particles, double const p0c, + double const min_x, double const max_x, double const mass0, + double const q0, double const chi, double const charge_ratio ) { + + NS(Particles)* pset = NS(Particles_new)( buffer, num_particles ); + + double const energy0 = sqrt( p0c * p0c + mass0 * mass0 ); + double const beta0 = p0c / energy0; + double const gamma0 = ( double )1.0 / sqrt( 1.0 - beta0 * beta0 ); + double const dx = ( max_x - min_x ) / ( double )( num_particles - 1 ); + + for( unsigned ii = 0 ; ii < num_particles ; ++ii ) + { + NS(Particles_set_q0_value)( pset, ii, q0 ); + NS(Particles_set_mass0_value)( pset, ii, mass0 ); + NS(Particles_set_beta0_value)( pset, ii, beta0 ); + NS(Particles_set_gamma0_value)( pset, ii, gamma0 ); + NS(Particles_set_p0c_value)( pset, ii, p0c ); + + NS(Particles_set_s_value)( pset, ii, 0.0 ); + NS(Particles_set_x_value)( pset, ii, dx * ii ); + NS(Particles_set_px_value)( pset, ii, 0.0 ); + NS(Particles_set_y_value)( pset, ii, 0.0 ); + NS(Particles_set_py_value)( pset, ii, 0.0 ); + NS(Particles_set_zeta_value)( pset, ii, 0.0 ); + NS(Particles_set_delta_value)( pset, ii, 0.0 ); + + NS(Particles_set_psigma_value)( pset, ii, 0.0 ); + NS(Particles_set_rvv_value)( pset, ii, 1.0 ); + NS(Particles_set_rpp_value)( pset, ii, 1.0 ); + NS(Particles_set_chi_value)( pset, ii, 1.0 ); + NS(Particles_set_charge_ratio_value)( pset, ii, 1.0 ); + + NS(Particles_set_particle_id_value)( pset, ii, ii ); + NS(Particles_set_at_element_id_value)( pset, ii, 0 ); + NS(Particles_set_at_turn_value)( pset, ii, 0 ); + NS(Particles_set_state_value)( pset, ii, 1 ); + } +} diff --git a/tests/sixtracklib/testlib/testdata/track_testdata.h b/tests/sixtracklib/testlib/testdata/track_testdata.h index b13cbd10e..aa8af28c8 100644 --- a/tests/sixtracklib/testlib/testdata/track_testdata.h +++ b/tests/sixtracklib/testlib/testdata/track_testdata.h @@ -17,19 +17,27 @@ extern "C" { #endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ -SIXTRL_HOST_FN NS(Buffer)* NS(TrackTestdata_extract_initial_particles_buffer)( - const char path_to_file[] ); +SIXTRL_EXTERN SIXTRL_HOST_FN SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* +NS(TrackTestdata_extract_initial_particles_buffer)( const char path_to_file[] ); -SIXTRL_HOST_FN NS(Buffer)* NS(TrackTestdata_extract_result_particles_buffer)( +SIXTRL_EXTERN SIXTRL_HOST_FN SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* +NS(TrackTestdata_extract_result_particles_buffer)( const char path_to_file[] ); -SIXTRL_HOST_FN NS(Buffer)* NS(TrackTestdata_extract_beam_elements_buffer)( - const char path_to_file[] ); +SIXTRL_EXTERN SIXTRL_HOST_FN SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* +NS(TrackTestdata_extract_beam_elements_buffer)( const char path_to_file[] ); + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(TrackTestdata_generate_fodo_lattice)( + SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* SIXTRL_RESTRICT buffer, + unsigned int const num_turns ); + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(TrackTestdata_generate_particle_distr_x)( + SIXTRL_BUFFER_ARGPTR_DEC NS(Buffer)* SIXTRL_RESTRICT buffer, + unsigned int const NUM_PARTICLES, double const p0c, + double const min_x, double const max_x, double const mass0, + double const q0, double const chi, double const charge_ratio ); #if !defined( _GPUCODE ) && defined( __cplusplus ) } #endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ - #endif /* SIXTRACKLIB_TESTS_SIXTRACKLIB_TESTLIB_TRACK_TESTDATA_HEADER_H__ */ - -/* end: tests/sixtracklib/testlib/testdata/track_testdata.h */ From 794f758ef308e2d91a3c6a12b852b89bbde4f5b6 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 12:36:27 +0100 Subject: [PATCH 34/77] examples/cxx: adds example for tracking the same FODO lattice as simpletrack --- examples/cxx/CMakeLists.txt | 4 ++ examples/cxx/track_fodo_opencl.cpp | 81 ++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 examples/cxx/track_fodo_opencl.cpp diff --git a/examples/cxx/CMakeLists.txt b/examples/cxx/CMakeLists.txt index fef6abfba..bfa26c9f5 100644 --- a/examples/cxx/CMakeLists.txt +++ b/examples/cxx/CMakeLists.txt @@ -45,6 +45,10 @@ if( SIXTRACKL_ENABLE_OPENCL ) add_executable( track_lhc_no_bb_opencl_cxx track_lhc_no_bb_opencl.cpp ) list( APPEND EXAMPLE_OPENCL_TARGETS track_lhc_no_bb_opencl_cxx ) + # ------------------------------------------------------------------------- + # track_fodo_opencl_cxx: + add_executable( track_fodo_opencl_cxx track_fodo_opencl.cpp ) + list( APPEND EXAMPLE_OPENCL_TARGETS track_fodo_opencl_cxx ) endif() diff --git a/examples/cxx/track_fodo_opencl.cpp b/examples/cxx/track_fodo_opencl.cpp new file mode 100644 index 000000000..528b75711 --- /dev/null +++ b/examples/cxx/track_fodo_opencl.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +#include "sixtracklib/testlib.h" +#include "sixtracklib/sixtracklib.hpp" + +int main( int const argc, char* argv[] ) +{ + namespace st = sixtrack; + using std::sqrt; + using track_job_type = st::TrackJobCl; + + unsigned int NUM_PARTICLES = 50000; // Default + unsigned int NUM_TURNS = 10000; // Default + std::string device_id = "0:0"; + + double const Q0 = 1.0; + double const MASS0 = ( double )SIXTRL_PHYS_CONST_MASS_PROTON_EV; + double const P0_C = 450.0e9; + double const MIN_X = 0.0; + double const MAX_X = 1e-8; + double const CHI = 1.0; + double const CHARGE_RATIO = 1.0; + + if( argc == 1 ) + { + std::cout << "Usage: " << argv[ 0 ] + << "device_id=" << device_id + << " num_particles=" << NUM_PARTICLES + << " num_turns=" << NUM_TURNS << std::endl; + } + + if( argc >= 2 ) + { + device_id = std::string{ argv[ 1 ] }; + + if( argc >= 3 ) + { + NUM_PARTICLES = std::stoi( argv[ 2 ] ); + + if( argc >= 4 ) + { + NUM_TURNS = std::stoi( argv[ 3 ] ); + } + } + } + + /* ---------------------------------------------------------------------- */ + /* Build lattice */ + + st::Buffer lattice; + ::NS(TrackTestdata_generate_fodo_lattice)( lattice.getCApiPtr(), NUM_TURNS ); + + /* ---------------------------------------------------------------------- */ + /* Init particle distribution */ + + st::Buffer pbuffer; + ::NS(TrackTestdata_generate_particle_distr_x)( pbuffer.getCApiPtr(), + NUM_PARTICLES, P0_C, MIN_X, MAX_X, MASS0, Q0, CHI, CHARGE_RATIO ); + + /* ---------------------------------------------------------------------- */ + /* Create Track Job */ + + track_job_type job( device_id, pbuffer, lattice ); + auto start_time = std::chrono::steady_clock::now(); + job.track( NUM_TURNS ); + auto stop_time = std::chrono::steady_clock::now(); + + st::collect( job ); + + auto wtime = stop_time - start_time; + + std::cout << "elapsed wall time: " << wtime.count() << " sec\r\n" + << " = " << wtime.count() / ( NUM_PARTICLES * NUM_TURNS ) + << " sec/particles/turn" << std::endl; + + return 0; +} From 6ccacdba8bcdb50e5aad0dfbbc71313765bdd9d7 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 13:00:24 +0100 Subject: [PATCH 35/77] examples/cxx: fixes timing track_fodod_opencl_cxx --- examples/cxx/track_fodo_opencl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cxx/track_fodo_opencl.cpp b/examples/cxx/track_fodo_opencl.cpp index 528b75711..0d7c5c395 100644 --- a/examples/cxx/track_fodo_opencl.cpp +++ b/examples/cxx/track_fodo_opencl.cpp @@ -71,7 +71,7 @@ int main( int const argc, char* argv[] ) st::collect( job ); - auto wtime = stop_time - start_time; + std::chrono::duration< double > const wtime = stop_time - start_time; std::cout << "elapsed wall time: " << wtime.count() << " sec\r\n" << " = " << wtime.count() / ( NUM_PARTICLES * NUM_TURNS ) From d29cad6df6188e1853d3067d2c9bcf473e16749f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 13:08:14 +0100 Subject: [PATCH 36/77] examples/cxx: adds tracking example for cuda --- examples/cxx/CMakeLists.txt | 7 +++ examples/cxx/track_fodo_cuda.cpp | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 examples/cxx/track_fodo_cuda.cpp diff --git a/examples/cxx/CMakeLists.txt b/examples/cxx/CMakeLists.txt index bfa26c9f5..a43b9f334 100644 --- a/examples/cxx/CMakeLists.txt +++ b/examples/cxx/CMakeLists.txt @@ -52,6 +52,13 @@ if( SIXTRACKL_ENABLE_OPENCL ) endif() +if( SIXTRACKL_ENABLE_CUDA ) + # ------------------------------------------------------------------------- + # track_fodo_opencl_cxx: + add_executable( track_fodo_cuda_cxx track_fodo_cuda.cpp ) + list( APPEND EXAMPLE_CUDA_TARGETS track_fodo_cuda_cxx ) +endif() + # ============================================================================== # Common settings for all programs: diff --git a/examples/cxx/track_fodo_cuda.cpp b/examples/cxx/track_fodo_cuda.cpp new file mode 100644 index 000000000..48a1c67b5 --- /dev/null +++ b/examples/cxx/track_fodo_cuda.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +#include "sixtracklib/testlib.h" +#include "sixtracklib/sixtracklib.hpp" + +int main( int const argc, char* argv[] ) +{ + namespace st = sixtrack; + using std::sqrt; + using track_job_type = st::CudaTrackJob; + + unsigned int NUM_PARTICLES = 50000; // Default + unsigned int NUM_TURNS = 10000; // Default + std::string const device_id = "0:0"; + + double const Q0 = 1.0; + double const MASS0 = ( double )SIXTRL_PHYS_CONST_MASS_PROTON_EV; + double const P0_C = 450.0e9; + double const MIN_X = 0.0; + double const MAX_X = 1e-8; + double const CHI = 1.0; + double const CHARGE_RATIO = 1.0; + + if( argc == 1 ) + { + std::cout << "Usage: " << argv[ 0 ] + << " num_particles=" << NUM_PARTICLES + << " num_turns=" << NUM_TURNS << std::endl; + } + + if( argc >= 2 ) + { + if( argc >= 3 ) + { + NUM_PARTICLES = std::stoi( argv[ 2 ] ); + + if( argc >= 4 ) + { + NUM_TURNS = std::stoi( argv[ 3 ] ); + } + } + } + + /* ---------------------------------------------------------------------- */ + /* Build lattice */ + + st::Buffer lattice; + ::NS(TrackTestdata_generate_fodo_lattice)( lattice.getCApiPtr(), NUM_TURNS ); + + /* ---------------------------------------------------------------------- */ + /* Init particle distribution */ + + st::Buffer pbuffer; + ::NS(TrackTestdata_generate_particle_distr_x)( pbuffer.getCApiPtr(), + NUM_PARTICLES, P0_C, MIN_X, MAX_X, MASS0, Q0, CHI, CHARGE_RATIO ); + + /* ---------------------------------------------------------------------- */ + /* Create Track Job */ + + track_job_type job( device_id, pbuffer, lattice ); + auto start_time = std::chrono::steady_clock::now(); + job.trackUntil( NUM_TURNS ); + auto stop_time = std::chrono::steady_clock::now(); + + st::collect( job ); + + std::chrono::duration< double > const wtime = stop_time - start_time; + + std::cout << "elapsed wall time: " << wtime.count() << " sec\r\n" + << " = " << wtime.count() / ( NUM_PARTICLES * NUM_TURNS ) + << " sec/particles/turn" << std::endl; + + return 0; +} From 8cf0ef5fe808dfe2c4f61975166cf18d48f51a1c Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 14:28:32 +0100 Subject: [PATCH 37/77] examples/cxx: fixes initialisation issue with cuda example --- examples/cxx/track_fodo_cuda.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cxx/track_fodo_cuda.cpp b/examples/cxx/track_fodo_cuda.cpp index 48a1c67b5..d2b53d897 100644 --- a/examples/cxx/track_fodo_cuda.cpp +++ b/examples/cxx/track_fodo_cuda.cpp @@ -15,7 +15,7 @@ int main( int const argc, char* argv[] ) unsigned int NUM_PARTICLES = 50000; // Default unsigned int NUM_TURNS = 10000; // Default - std::string const device_id = "0:0"; + std::string const device_id = "0.0"; double const Q0 = 1.0; double const MASS0 = ( double )SIXTRL_PHYS_CONST_MASS_PROTON_EV; From 882ed8fb1d24a98f1564b87e9fe86687970cf0b0 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 15:43:21 +0100 Subject: [PATCH 38/77] examples/c99: fix synchronisation issue in the track fodo example --- examples/cxx/track_fodo_opencl.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/cxx/track_fodo_opencl.cpp b/examples/cxx/track_fodo_opencl.cpp index 0d7c5c395..d70b9c353 100644 --- a/examples/cxx/track_fodo_opencl.cpp +++ b/examples/cxx/track_fodo_opencl.cpp @@ -67,9 +67,8 @@ int main( int const argc, char* argv[] ) track_job_type job( device_id, pbuffer, lattice ); auto start_time = std::chrono::steady_clock::now(); job.track( NUM_TURNS ); - auto stop_time = std::chrono::steady_clock::now(); - st::collect( job ); + auto stop_time = std::chrono::steady_clock::now(); std::chrono::duration< double > const wtime = stop_time - start_time; From 2b02f72899e2c89bc68a2e92e0d8912655cbb180 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 15:44:09 +0100 Subject: [PATCH 39/77] examples/cxx: fixes copy & paste errors in the cuda track fodo example --- examples/cxx/track_fodo_cuda.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/examples/cxx/track_fodo_cuda.cpp b/examples/cxx/track_fodo_cuda.cpp index 48a1c67b5..9ec83d3b7 100644 --- a/examples/cxx/track_fodo_cuda.cpp +++ b/examples/cxx/track_fodo_cuda.cpp @@ -15,7 +15,7 @@ int main( int const argc, char* argv[] ) unsigned int NUM_PARTICLES = 50000; // Default unsigned int NUM_TURNS = 10000; // Default - std::string const device_id = "0:0"; + std::string const device_id = "0.0"; double const Q0 = 1.0; double const MASS0 = ( double )SIXTRL_PHYS_CONST_MASS_PROTON_EV; @@ -34,14 +34,11 @@ int main( int const argc, char* argv[] ) if( argc >= 2 ) { + NUM_PARTICLES = std::stoi( argv[ 1 ] ); + if( argc >= 3 ) { - NUM_PARTICLES = std::stoi( argv[ 2 ] ); - - if( argc >= 4 ) - { - NUM_TURNS = std::stoi( argv[ 3 ] ); - } + NUM_TURNS = std::stoi( argv[ 2 ] ); } } From 1d132acf63e689610bf89a8573ae6266f9fda3d0 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Thu, 26 Nov 2020 15:44:27 +0100 Subject: [PATCH 40/77] examples/c99: fix synchronisation issue in the track fodo example --- examples/cxx/track_fodo_cuda.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/cxx/track_fodo_cuda.cpp b/examples/cxx/track_fodo_cuda.cpp index 9ec83d3b7..08bb0e851 100644 --- a/examples/cxx/track_fodo_cuda.cpp +++ b/examples/cxx/track_fodo_cuda.cpp @@ -61,9 +61,8 @@ int main( int const argc, char* argv[] ) track_job_type job( device_id, pbuffer, lattice ); auto start_time = std::chrono::steady_clock::now(); job.trackUntil( NUM_TURNS ); - auto stop_time = std::chrono::steady_clock::now(); - st::collect( job ); + auto stop_time = std::chrono::steady_clock::now(); std::chrono::duration< double > const wtime = stop_time - start_time; From 7f566890db58ed2e4444bdafaab97ef1d1fb8aa4 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 2 Dec 2020 14:06:27 +0100 Subject: [PATCH 41/77] cmake: adds option to Settings.cmake to control output of OpenCL compile report --- Settings.cmake.default | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Settings.cmake.default b/Settings.cmake.default index 94a9fc12c..4a88fdb93 100644 --- a/Settings.cmake.default +++ b/Settings.cmake.default @@ -139,6 +139,10 @@ if( SIXTRACKL_ENABLE_OPENCL ) option( SIXTRACKL_OPENCL_CXX_ENABLE_EXCEPTIONS "Enable OpenCL 1.x Host-side C++ Exceptions" ON ) + + set( SIXTRACKL_OPENCL_PRINT_BUILD_REPORT "error" CACHE STRING + "print OpenCL build report [values: always, error, never]" ) + endif() # ------------------------------------------------------------------------------ From b9874bf7537f2cda6788094b2c0b5cfb399b4957 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 2 Dec 2020 14:08:36 +0100 Subject: [PATCH 42/77] cmake: adds handling for print build report to OpenCL setup --- cmake/SetupOpenCL.cmake | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/cmake/SetupOpenCL.cmake b/cmake/SetupOpenCL.cmake index 8225801ab..f0476a3b0 100644 --- a/cmake/SetupOpenCL.cmake +++ b/cmake/SetupOpenCL.cmake @@ -255,6 +255,24 @@ if( NOT SIXTRACKL_CMAKE_SETUP_OPENCL_FINISHED ) endif() endif() + set( SIXTRL_OPENCL_PRINT_BUILD_REPORT 0 ) + + if( SIXTRACKL_OPENCL_PRINT_BUILD_REPORT ) + if( "${SIXTRACKL_OPENCL_PRINT_BUILD_REPORT}" STREQUAL "always" ) + set( SIXTRL_OPENCL_PRINT_BUILD_REPORT 2 ) + message( STATUS "------- setting opencl build program report policy to \"always\"" ) + elseif( "${SIXTRACKL_OPENCL_PRINT_BUILD_REPORT}" STREQUAL "error" ) + set( SIXTRL_OPENCL_PRINT_BUILD_REPORT 1 ) + message( STATUS "------- setting opencl build program report policy to \"error\"" ) + elseif( "${SIXTRACKL_OPENCL_PRINT_BUILD_REPORT}" STREQUAL "never" ) + set( SIXTRL_OPENCL_PRINT_BUILD_REPORT 0 ) + message( STATUS "------- setting opencl build program report policy to \"never\"" ) + else() + message( STATUS "------- illegal value for SIXTRACKL_OPENCL_PRINT_BUILD_REPORT, choosing \"never\"" ) + set( SIXTRL_OPENCL_PRINT_BUILD_REPORT 0 ) + endif() + endif() + # --------------------------------------------------------------------------- list( APPEND SIXTRACKLIB_SUPPORTED_MODULES "OPENCL" ) From 72510519fd6c670a9e73b1f8f1df2fd8df4ebd31 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 2 Dec 2020 14:09:59 +0100 Subject: [PATCH 43/77] sixtracklib/opencl: adds handling for printing build report for OpenCL programs --- sixtracklib/opencl/opencl.hpp.template | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sixtracklib/opencl/opencl.hpp.template b/sixtracklib/opencl/opencl.hpp.template index 6569057ec..6c31adfea 100644 --- a/sixtracklib/opencl/opencl.hpp.template +++ b/sixtracklib/opencl/opencl.hpp.template @@ -32,6 +32,10 @@ #define SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG @SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG@ #endif /* !defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) */ +#if !defined( SIXTRL_OPENCL_PRINT_BUILD_REPORT ) + #define SIXTRL_OPENCL_PRINT_BUILD_REPORT @SIXTRL_OPENCL_PRINT_BUILD_REPORT@ +#endif /* !defined( SIXTRL_OPENCL_PRINT_BUILD_REPORT ) */ + @SIXTRL_OPENCL_ENABLE_EXCEPTION_STR@ #if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) From 7471877f23678421bcb7a4c73be9e3062360ff3d Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 2 Dec 2020 14:11:08 +0100 Subject: [PATCH 44/77] sixtracklib/opencl: adds helper functions to handle the build report --- sixtracklib/opencl/internal/base_context.cpp | 185 +++++++++++++------ sixtracklib/opencl/internal/base_context.h | 10 + 2 files changed, 137 insertions(+), 58 deletions(-) diff --git a/sixtracklib/opencl/internal/base_context.cpp b/sixtracklib/opencl/internal/base_context.cpp index ba4cbe579..6a254a825 100644 --- a/sixtracklib/opencl/internal/base_context.cpp +++ b/sixtracklib/opencl/internal/base_context.cpp @@ -3086,75 +3086,162 @@ namespace SIXTRL_CXX_NAMESPACE auto& build_device = this->m_available_devices.at( this->m_selected_node_index ); - cl_int ret = CL_SUCCESS; cl_build_status build_status = CL_BUILD_NONE; - #if defined( SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS ) && \ - SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS == 1 + #if defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) + int const exception_flag = ( int )SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG; + #else + int const exception_flag = int{ 0 }; + #endif /* defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) */ + ( void )exception_flag; + + #if defined( SIXTRL_OPENCL_PRINT_BUILD_REPORT ) + int const print_report = ( int )SIXTRL_OPENCL_PRINT_BUILD_REPORT; + #else + int const print_report = int{ 0 }; + #endif /* defined( SIXTRL_OPENCL_PRINT_BUILD_REPORT ) */ + + #if defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) && \ + SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG == 1 try { #endif /* OpenCL 1.x C++ Host Exceptions enabled */ - ret = cl_program.build( program_data.m_compile_options.c_str() ); + cl_int ret = cl_program.build( program_data.m_compile_options.c_str() ); + SIXTRL_ASSERT( ret == CL_SUCCESS ); + ( void )ret; + build_status = cl_program.getBuildInfo< CL_PROGRAM_BUILD_STATUS >( build_device ); - #if defined( SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS ) && \ - SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS == 1 + #if defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) && \ + SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG == 1 } catch( cl::Error& e ) { - if( ( this->debugMode() ) && - ( e.err() == CL_BUILD_PROGRAM_FAILURE ) ) + std::string build_log = ""; + build_log.clear(); + + if( print_report == 2 ) + { + build_log = this->get_program_build_log( + build_device, cl_program ); + + if( build_log.empty() ) + { + build_log = "no build log available"; + } + } + else if( print_report == 1 ) + { + if( e.err() == CL_BUILD_PROGRAM_FAILURE ) + { + build_log = this->get_program_build_log( + build_device, cl_program ); + } + } + + if( !build_log.empty() ) { std::string name = build_device.getInfo< CL_DEVICE_NAME >(); - std::string buildlog = cl_program.getBuildInfo< - CL_PROGRAM_BUILD_LOG >( build_device ); + std::cerr << "\r\n" + << "Build log for device " + << this->selectedNodeIdStr() << " [" + << name << " ]\r\n"; + + if( !program_data.m_file_path.empty() ) + { + std::cerr << "Program : " + << program_data.m_file_path << "\r\n"; + } - std::cerr << "Build log for " << name << ":" << std::endl - << buildlog << std::endl; + std::cerr << "Build Log: " << build_log + << std::endl; + std::cerr.flush(); } throw e; } #endif /* OpenCL 1.x C++ Host Exceptions enabled */ - if( ( build_status != CL_BUILD_NONE ) || ( ret == CL_SUCCESS ) ) + this->update_program_data_with_build_error( + build_status, build_device, cl_program, program_data ); + + success = program_data.m_compiled; + + if( ( ( print_report == 1 ) && ( !success ) ) || + ( print_report == 2 ) ) { - program_data.m_compile_report = - cl_program.getBuildInfo< CL_PROGRAM_BUILD_LOG >( build_device ); + std::cerr << "program_name : " + << program_data.m_file_path << "\r\n" + << "compiled : " << std::boolalpha + << program_data.m_compiled << std::noboolalpha << "\r\n" + << "compile options : " + << program_data.m_compile_options << "\r\n" + << "compile report : " << "\r\n" + << program_data.m_compile_report << std::endl; + } + } - if( !program_data.m_compile_report.empty() ) - { - program_data.m_compile_report.erase( - std::find_if( - program_data.m_compile_report.rbegin(), - program_data.m_compile_report.rend(), - []( int ch ){ return !std::isspace( ch ); } ).base(), - program_data.m_compile_report.end() ); - } + return success; + } - if( !program_data.m_compile_report.empty() ) - { - program_data.m_compile_report.erase( - program_data.m_compile_report.begin(), - std::find_if( - program_data.m_compile_report.begin(), - program_data.m_compile_report.end(), - []( int ch ){ return !std::isspace( ch ); } ) ); - } + std::string ctx_t::get_program_build_log( + cl::Device& SIXTRL_RESTRICT_REF build_device, + cl::Program& SIXTRL_RESTRICT_REF program ) const + { + std::string build_log = program.getBuildInfo< + CL_PROGRAM_BUILD_LOG >( build_device ); - if( ( !program_data.m_compile_report.empty() ) && - ( program_data.m_compile_report.size() == size_type{ 1 } ) && - ( program_data.m_compile_report[ 0 ] == '\0' ) ) - { - program_data.m_compile_report.clear(); - } + if( !build_log.empty() ) + { + build_log.erase( std::find_if( + build_log.rbegin(), build_log.rend(), + []( int ch ){ return !std::isspace( ch ); } ).base(), + build_log.end() ); + } + + if( !build_log.empty() ) + { + build_log.erase( build_log.begin(), + std::find_if( build_log.begin(), build_log.end(), + []( int ch ){ return !std::isspace( ch ); } ) ); + } + + if( ( !build_log.empty() ) && + ( build_log.size() == size_type{ 1 } ) && + ( build_log[ 0 ] == '\0' ) ) + { + build_log.clear(); + } + + return build_log; + } + + void ctx_t::update_program_data_with_build_error( + cl_build_status build_status, + cl::Device& SIXTRL_RESTRICT_REF build_device, + cl::Program& SIXTRL_RESTRICT_REF cl_program, + ctx_t::program_data_t& SIXTRL_RESTRICT_REF program_data ) { + + if( build_status != CL_BUILD_NONE ) + { + program_data.m_compile_report = this->get_program_build_log( + build_device, cl_program ); + + #if defined( SIXTRL_OPENCL_PRINT_BUILD_REPORT ) && \ + ( SIXTRL_OPENCL_PRINT_BUILD_REPORT == 2 ) /* "always" */ + + if( program_data.m_compile_report.empty() ) + { + program_data.m_compile_report = "no build log available"; } + #endif + if( build_status == CL_BUILD_SUCCESS ) { - success = program_data.m_compiled = true; + program_data.m_compiled = true; } else if( build_status == CL_BUILD_ERROR ) { @@ -3170,25 +3257,7 @@ namespace SIXTRL_CXX_NAMESPACE program_data.m_compiled = false; program_data.m_compile_report.clear(); } - - if( ( this->debugMode() ) && - ( ( !program_data.m_compile_report.empty() ) || - ( !program_data.m_compiled ) ) ) - { - std::cerr << "program_name : " - << program_data.m_file_path << "\r\n" - << "compiled : " - << std::boolalpha << program_data.m_compiled - << std::noboolalpha << "\r\n" - << "compile options : " - << program_data.m_compile_options << "\r\n" - << "compile report : " << "\r\n" - << program_data.m_compile_report - << std::endl; - } } - - return success; } } #endif /* C++ */ diff --git a/sixtracklib/opencl/internal/base_context.h b/sixtracklib/opencl/internal/base_context.h index f667c1fd8..d5e34261e 100644 --- a/sixtracklib/opencl/internal/base_context.h +++ b/sixtracklib/opencl/internal/base_context.h @@ -1177,6 +1177,16 @@ namespace SIXTRL_CXX_NAMESPACE private: + std::string get_program_build_log( + cl::Device& SIXTRL_RESTRICT_REF build_device, + cl::Program& SIXTRL_RESTRICT_REF program ) const; + + void update_program_data_with_build_error( + cl_build_status build_status, + cl::Device& SIXTRL_RESTRICT_REF build_device, + cl::Program& SIXTRL_RESTRICT_REF cl_program, + program_data_t& SIXTRL_RESTRICT_REF program_data ); + void doParseConfigStringBaseImpl( const char *const SIXTRL_RESTRICT config_str ); From 35abfd099adcfe1cae98531d89c95cffd7d8775f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 2 Dec 2020 14:11:47 +0100 Subject: [PATCH 45/77] sixtracklib/opencl: fixes exception handling issue due to changed macro name --- sixtracklib/opencl/internal/base_context.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sixtracklib/opencl/internal/base_context.cpp b/sixtracklib/opencl/internal/base_context.cpp index 6a254a825..f4fe99a10 100644 --- a/sixtracklib/opencl/internal/base_context.cpp +++ b/sixtracklib/opencl/internal/base_context.cpp @@ -2513,16 +2513,16 @@ namespace SIXTRL_CXX_NAMESPACE std::string const platform_name = platform.getInfo< CL_PLATFORM_NAME >(); - #if defined( SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS ) && \ - SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS == 1 + #if defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) && \ + SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG == 1 try { #endif /* OpenCL 1.x C++ Host Exceptions enabled */ platform.getDevices( CL_DEVICE_TYPE_ALL, &temp_devices ); - #if defined( SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS ) && \ - SIXTRL_OPENCL_CXX_ENABLES_HOST_EXCEPTIONS == 1 + #if defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) && \ + SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG == 1 } catch( cl::Error const& e ) { From 2e531c77e81edb075f3b0140e5ea600e315786b4 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 2 Dec 2020 14:15:46 +0100 Subject: [PATCH 46/77] sixtracklib/opencl: adds OpenCL build report handlng to install target --- sixtracklib/opencl/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/sixtracklib/opencl/CMakeLists.txt b/sixtracklib/opencl/CMakeLists.txt index a9d086f9e..8903be4f2 100644 --- a/sixtracklib/opencl/CMakeLists.txt +++ b/sixtracklib/opencl/CMakeLists.txt @@ -35,6 +35,7 @@ set( CL_H_INSTALL_INFO set( SIXTRL_OPENCL_C99_HEADER_FILE \"${SIXTRL_OPENCL_C99_HEADER_FILE}\" ) set( SIXTRL_OPENCL_C99_INCLUDE_DIR \"${SIXTRL_OPENCL_C99_INCLUDE_DIR}\" ) set( SIXTRL_OPENCL_C99_HEADER_FILE_VERSION ${SIXTRL_OPENCL_C99_HEADER_FILE_VERSION} ) + set( SIXTRL_OPENCL_PRINT_BUILD_REPORT ${SIXTRL_OPENCL_PRINT_BUILD_REPORT} ) set( SIXTRL_OPENCL_ENABLE_EXCEPTION_STR ${SIXTRL_OPENCL_ENABLE_EXCEPTION_STR} ) set( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG From 98b79a9697c175e722ad814bd77943d07ad8e2ba Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Wed, 2 Dec 2020 15:39:05 +0100 Subject: [PATCH 47/77] sixtraclib/opencl: adds output for the Compile Options --- sixtracklib/opencl/internal/base_context.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/sixtracklib/opencl/internal/base_context.cpp b/sixtracklib/opencl/internal/base_context.cpp index f4fe99a10..e70e90928 100644 --- a/sixtracklib/opencl/internal/base_context.cpp +++ b/sixtracklib/opencl/internal/base_context.cpp @@ -3088,13 +3088,6 @@ namespace SIXTRL_CXX_NAMESPACE cl_build_status build_status = CL_BUILD_NONE; - #if defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) - int const exception_flag = ( int )SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG; - #else - int const exception_flag = int{ 0 }; - #endif /* defined( SIXTRL_OPENCL_ENABLES_EXCEPTION_FLAG ) */ - ( void )exception_flag; - #if defined( SIXTRL_OPENCL_PRINT_BUILD_REPORT ) int const print_report = ( int )SIXTRL_OPENCL_PRINT_BUILD_REPORT; #else @@ -3146,7 +3139,7 @@ namespace SIXTRL_CXX_NAMESPACE std::string name = build_device.getInfo< CL_DEVICE_NAME >(); std::cerr << "\r\n" << "Build log for device " - << this->selectedNodeIdStr() << " [" + << this->selectedNodeIdStr() << " [ " << name << " ]\r\n"; if( !program_data.m_file_path.empty() ) @@ -3155,7 +3148,9 @@ namespace SIXTRL_CXX_NAMESPACE << program_data.m_file_path << "\r\n"; } - std::cerr << "Build Log: " << build_log + std::cerr << "Program Options : " + << program_data.m_compile_options << "\r\n" + << "Build Log: " << build_log << std::endl; std::cerr.flush(); } From b946a6ba1c63632a36dd35249b17fef5b94b85d6 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 13:44:49 +0200 Subject: [PATCH 48/77] common: adds new mathematical constants --- sixtracklib/common/internal/math_constants.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sixtracklib/common/internal/math_constants.h b/sixtracklib/common/internal/math_constants.h index ca6e49d42..1496ed8ae 100644 --- a/sixtracklib/common/internal/math_constants.h +++ b/sixtracklib/common/internal/math_constants.h @@ -26,6 +26,26 @@ 3.1415926535897932384626433832795028841971693993751L #endif /* !defined( SIXTRL_MATH_CONST_PI ) */ +#if !defined( SIXTRL_MATH_CONST_PI_SQU ) + #define SIXTRL_MATH_CONST_PI_SQU \ + 9.869604401089358618834490999876151135313699407240790626413349376371L +#endif /* !defined( SIXTRL_MATH_CONST_PI_SQU ) */ + +#if !defined( SIXTRL_MATH_CONST_SQRT_PI ) + #define SIXTRL_MATH_CONST_SQRT_PI \ + 1.77245385090551602729816748334114518279754945612238712821380779L +#endif /* !defined( SIXTRL_MATH_CONST_SQRT_PI ) */ + +#if !defined( SIXTRL_MATH_CONST_SQRT_TWO ) + #define SIXTRL_MATH_CONST_SQRT_TWO \ + 1.414213562373095048801688724209698078569671875376948073176679738L +#endif /* !defined( SIXTRL_MATH_CONST_SQRT_TWO ) */ + +#if !defined( SIXTRL_MATH_CONST_TWO_OVER_SQRT_PI ) + #define SIXTRL_MATH_CONST_TWO_OVER_SQRT_PI \ + 1.128379167095512573896158903121545171688101258657997713688171443L +#endif /* SIXTRL_MATH_CONST_TWO_OVER_SQRT_PI */ + #if !defined( SIXTRL_MATH_CONST_DEG2RAD ) #define SIXTRL_MATH_CONST_DEG2RAD \ 0.0174532925199432957692369076848861271344287188854172546L From 44449177ec9b3b48598cb72ede9ab88a3491369a Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 13:46:41 +0200 Subject: [PATCH 49/77] common: increases number of sig. digits for pi, removes duplicate entry for sqrt(pi) --- sixtracklib/common/internal/math_constants.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sixtracklib/common/internal/math_constants.h b/sixtracklib/common/internal/math_constants.h index 1496ed8ae..76c08c39b 100644 --- a/sixtracklib/common/internal/math_constants.h +++ b/sixtracklib/common/internal/math_constants.h @@ -23,7 +23,7 @@ #if !defined( SIXTRL_MATH_CONST_PI ) #define SIXTRL_MATH_CONST_PI \ - 3.1415926535897932384626433832795028841971693993751L + 3.141592653589793238462643383279502884197169399375105820974944592L #endif /* !defined( SIXTRL_MATH_CONST_PI ) */ #if !defined( SIXTRL_MATH_CONST_PI_SQU ) @@ -56,10 +56,7 @@ 57.29577951308232087679815481410517033240547246656432154916L #endif /* !defined( SIXTRL_MATH_CONST_RAD2DEG ) */ -#if !defined( SIXTRL_MATH_CONST_SQRT_PI ) - #define SIXTRL_MATH_CONST_SQRT_PI \ - 1.77245385090551602729816748334114518279754945612238712821381L -#endif /* !defined( SIXTRL_MATH_CONST_SQRT_PI ) */ + #if defined( __cplusplus ) #if !defined( SIXTRL_NO_INCLUDES ) From f2855cb4e80980e1da69e6bff28d6536345a49a2 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 13:50:26 +0200 Subject: [PATCH 50/77] common: completes API for new and updated mathematical constants --- sixtracklib/common/internal/math_constants.h | 116 +++++++++++++++++-- 1 file changed, 104 insertions(+), 12 deletions(-) diff --git a/sixtracklib/common/internal/math_constants.h b/sixtracklib/common/internal/math_constants.h index 76c08c39b..6d78442f5 100644 --- a/sixtracklib/common/internal/math_constants.h +++ b/sixtracklib/common/internal/math_constants.h @@ -76,18 +76,18 @@ namespace SIXTRL_CXX_NAMESPACE return static_cast< R >( SIXTRL_MATH_CONST_PI ); } - SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R get_deg2rad() SIXTRL_NOEXCEPT_COND( + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R get_sqrt_two() SIXTRL_NOEXCEPT_COND( std::is_nothrow_copy_constructible< R >::value && std::is_nothrow_move_constructible< R >::value ) { - return static_cast< R >( SIXTRL_MATH_CONST_DEG2RAD ); + return static_cast< R >( SIXTRL_MATH_CONST_SQRT_TWO ); } - SIXTRL_STATIC SIXTRL_INLINE R get_rad2deg() SIXTRL_NOEXCEPT_COND( + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R get_pi_squ() SIXTRL_NOEXCEPT_COND( std::is_nothrow_copy_constructible< R >::value && std::is_nothrow_move_constructible< R >::value ) { - return static_cast< R >( SIXTRL_MATH_CONST_RAD2DEG ); + return static_cast< R >( SIXTRL_MATH_CONST_PI_SQU ); } SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R get_sqrt_pi() SIXTRL_NOEXCEPT_COND( @@ -96,6 +96,29 @@ namespace SIXTRL_CXX_NAMESPACE { return static_cast< R >( SIXTRL_MATH_CONST_SQRT_PI ); } + + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R get_two_over_sqrt_pi() SIXTRL_NOEXCEPT_COND( + std::is_nothrow_copy_constructible< R >::value && + std::is_nothrow_move_constructible< R >::value ) + { + return static_cast< R >( SIXTRL_MATH_CONST_TWO_OVER_SQRT_PI ); + } + + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R get_deg2rad() SIXTRL_NOEXCEPT_COND( + std::is_nothrow_copy_constructible< R >::value && + std::is_nothrow_move_constructible< R >::value ) + { + return static_cast< R >( SIXTRL_MATH_CONST_DEG2RAD ); + } + + SIXTRL_STATIC SIXTRL_INLINE R get_rad2deg() SIXTRL_NOEXCEPT_COND( + std::is_nothrow_copy_constructible< R >::value && + std::is_nothrow_move_constructible< R >::value ) + { + return static_cast< R >( SIXTRL_MATH_CONST_RAD2DEG ); + } + + }; /* --------------------------------------------------------------------- */ @@ -106,7 +129,25 @@ namespace SIXTRL_CXX_NAMESPACE std::is_nothrow_copy_constructible< R >::value && std::is_nothrow_move_constructible< R >::value ) { - return MathConstHelper< R >::get_pi(); + return SIXTRL_CXX_NAMESPACE::MathConstHelper< R >::get_pi(); + } + + template< class R > + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R MathConst_pi_squ() + SIXTRL_NOEXCEPT_COND( + std::is_nothrow_copy_constructible< R >::value && + std::is_nothrow_move_constructible< R >::value ) + { + return SIXTRL_CXX_NAMESPACE::MathConstHelper< R >::get_pi_squ(); + } + + template< class R > + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R MathConst_sqrt_two() + SIXTRL_NOEXCEPT_COND( + std::is_nothrow_copy_constructible< R >::value && + std::is_nothrow_move_constructible< R >::value ) + { + return SIXTRL_CXX_NAMESPACE::MathConstHelper< R >::get_sqrt_two(); } template< class R > @@ -115,7 +156,7 @@ namespace SIXTRL_CXX_NAMESPACE std::is_nothrow_copy_constructible< R >::value && std::is_nothrow_move_constructible< R >::value ) { - return MathConstHelper< R >::get_deg2rad(); + return SIXTRL_CXX_NAMESPACE::MathConstHelper< R >::get_deg2rad(); } template< class R > @@ -124,7 +165,17 @@ namespace SIXTRL_CXX_NAMESPACE std::is_nothrow_copy_constructible< R >::value && std::is_nothrow_move_constructible< R >::value ) { - return MathConstHelper< R >::get_rad2deg(); + return SIXTRL_CXX_NAMESPACE::MathConstHelper< R >::get_rad2deg(); + } + + template< class R > + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R MathConst_two_over_sqrt_pi() + SIXTRL_NOEXCEPT_COND( + std::is_nothrow_copy_constructible< R >::value && + std::is_nothrow_move_constructible< R >::value ) + { + return SIXTRL_CXX_NAMESPACE::MathConstHelper< + R >::get_two_over_sqrt_pi(); } template< class R > @@ -133,7 +184,7 @@ namespace SIXTRL_CXX_NAMESPACE std::is_nothrow_copy_constructible< R >::value && std::is_nothrow_move_constructible< R >::value ) { - return MathConstHelper< R >::get_sqrt_pi(); + return SIXTRL_CXX_NAMESPACE::MathConstHelper< R >::get_sqrt_pi(); } } @@ -143,6 +194,12 @@ SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R NS(MathConst_pi)() return SIXTRL_CXX_NAMESPACE::MathConst_pi< R >(); } +template< class R > +SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R NS(MathConst_pi_squ)() +{ + return SIXTRL_CXX_NAMESPACE::MathConst_pi_squ< R >(); +} + template< class R > SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R NS(MathConst_deg2rad)() { @@ -161,6 +218,18 @@ SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R NS(MathConst_sqrt_pi)() return SIXTRL_CXX_NAMESPACE::MathConst_sqrt_pi< R >(); } +template< class R > +SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R NS(MathConst_sqrt_two)() +{ + return SIXTRL_CXX_NAMESPACE::MathConst_sqrt_two< R >(); +} + +template< class R > +SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN R NS(MathConst_two_over_sqrt_pi)() +{ + return SIXTRL_CXX_NAMESPACE::MathConst_two_over_sqrt_pi< R >(); +} + #endif /* C++ */ #if defined( __cplusplus ) && !defined( _GPUCODE ) @@ -169,6 +238,8 @@ extern "C" { SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(MathConst_pi)( void ) SIXTRL_NOEXCEPT; +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(MathConst_pi_squ)( void ) SIXTRL_NOEXCEPT; + SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(MathConst_deg2rad)( void ) SIXTRL_NOEXCEPT; @@ -178,6 +249,12 @@ SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(MathConst_sqrt_pi)( void ) SIXTRL_NOEXCEPT; +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T + NS(MathConst_two_over_sqrt_pi)( void ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T + NS(MathConst_sqrt_two)( void ) SIXTRL_NOEXCEPT; + #if defined( __cplusplus ) && !defined( _GPUCODE ) } #endif /* C++, Host */ @@ -190,26 +267,41 @@ SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T extern "C" { #endif /* defined( __cplusplus ) && !defined( _GPUCODE ) */ -SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_pi)( void ) SIXTRL_NOEXCEPT +SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_pi)() SIXTRL_NOEXCEPT { return ( SIXTRL_REAL_T )SIXTRL_MATH_CONST_PI; } -SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_deg2rad)( void ) SIXTRL_NOEXCEPT +SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_pi_squ)() SIXTRL_NOEXCEPT +{ + return ( SIXTRL_REAL_T )SIXTRL_MATH_CONST_PI_SQU; +} + +SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_deg2rad)() SIXTRL_NOEXCEPT { return ( SIXTRL_REAL_T )SIXTRL_MATH_CONST_DEG2RAD; } -SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_rad2deg)( void ) SIXTRL_NOEXCEPT +SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_rad2deg)() SIXTRL_NOEXCEPT { return ( SIXTRL_REAL_T )SIXTRL_MATH_CONST_RAD2DEG; } -SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_sqrt_pi)( void ) SIXTRL_NOEXCEPT +SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_sqrt_two)() SIXTRL_NOEXCEPT +{ + return ( SIXTRL_REAL_T )SIXTRL_MATH_CONST_SQRT_TWO; +} + +SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_sqrt_pi)() SIXTRL_NOEXCEPT { return ( SIXTRL_REAL_T )SIXTRL_MATH_CONST_SQRT_PI; } +SIXTRL_INLINE SIXTRL_REAL_T NS(MathConst_two_over_sqrt_pi)() SIXTRL_NOEXCEPT +{ + return ( SIXTRL_REAL_T )SIXTRL_MATH_CONST_TWO_OVER_SQRT_PI; +} + #if defined( __cplusplus ) && !defined( _GPUCODE ) } #endif /* defined( __cplusplus ) && !defined( _GPUCODE ) */ From 8c52151d5aed23aa76f8f40efff0a815b2f6ed07 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 13:52:20 +0200 Subject: [PATCH 51/77] common: fixes issues with the factorial methods - Fixes copy & paste error in 10th and 11th factorial switch-case statement - Removes recursion which does not work ootb on OpenCL 1.2 --- sixtracklib/common/internal/math_factorial.h | 251 +++++++++++-------- 1 file changed, 145 insertions(+), 106 deletions(-) diff --git a/sixtracklib/common/internal/math_factorial.h b/sixtracklib/common/internal/math_factorial.h index e494f909e..6b539ae7c 100644 --- a/sixtracklib/common/internal/math_factorial.h +++ b/sixtracklib/common/internal/math_factorial.h @@ -10,6 +10,7 @@ #if !defined( SIXTRL_NO_INCLUDES ) #include "sixtracklib/common/definitions.h" #include "sixtracklib/common/internal/type_store_traits.hpp" + #include "sixtracklib/common/internal/math_functions.h" #endif /* !defined( SIXTRL_NO_INCLUDES ) */ #if defined( __cplusplus ) @@ -23,47 +24,56 @@ namespace SIXTRL_CXX_NAMESPACE template< typename I, typename T = SIXTRL_REAL_T > SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN typename std::enable_if< std::is_integral< I >::value, T >::type - Math_factorial( - typename TypeMethodParamTraits< I >::const_argument_type n ) + Math_factorial( typename TypeMethodParamTraits< I >::const_argument_type n ) { + namespace st = SIXTRL_CXX_NAMESPACE; + + /* generated by the following python code: + + from scipy.special import factorial + import decimal + from decimal import Decimal + for ii in range( 0, 21 ): + print( f"case I{{ {ii:2d} }}: {{ result = static_cast< T >( " + + f"{Decimal(factorial(ii,exact=True)):20.1f} );" + + f" break; }}" ) + */ + T result = T{ 1 }; switch( n ) { - case I{ 0 }: { result = static_cast< T >( 1 ); break; } - case I{ 1 }: { result = static_cast< T >( 1 ); break; } - case I{ 2 }: { result = static_cast< T >( 2 ); break; } - case I{ 3 }: { result = static_cast< T >( 6 ); break; } - case I{ 4 }: { result = static_cast< T >( 24 ); break; } - case I{ 5 }: { result = static_cast< T >( 120 ); break; } - case I{ 6 }: { result = static_cast< T >( 720 ); break; } - case I{ 7 }: { result = static_cast< T >( 5040 ); break; } - case I{ 8 }: { result = static_cast< T >( 40320 ); break; } - case I{ 9 }: { result = static_cast< T >( 362880 ); break; } - case I{ 10 }: { result = static_cast< T >( 3628800 ); break; } - case I{ 11 }: { result = static_cast< T >( 3628800 ); break; } - case I{ 12 }: { result = static_cast< T >( 479001600 ); break; } - case I{ 13 }: { result = static_cast< T >( 6227020800 ); break; } - case I{ 14 }: { result = static_cast< T >( 87178291200 ); break; } - case I{ 15 }: { result = static_cast< T >( 1307674368000 ); break; } - case I{ 16 }: { result = static_cast< T >( 20922789888000 ); break; } - case I{ 17 }: { result = static_cast< T >( 355687428096000 ); break; } - case I{ 18 }: { result = static_cast< T >( 6402373705728000 ); break; } - case I{ 19 }: { result = static_cast< T >( 121645100408832000 ); break; } - case I{ 20 }: { result = static_cast< T >( 2432902008176640000 ); break; } + case I{ 0 }: { result = static_cast< T >( 1.0 ); break; } + case I{ 1 }: { result = static_cast< T >( 1.0 ); break; } + case I{ 2 }: { result = static_cast< T >( 2.0 ); break; } + case I{ 3 }: { result = static_cast< T >( 6.0 ); break; } + case I{ 4 }: { result = static_cast< T >( 24.0 ); break; } + case I{ 5 }: { result = static_cast< T >( 120.0 ); break; } + case I{ 6 }: { result = static_cast< T >( 720.0 ); break; } + case I{ 7 }: { result = static_cast< T >( 5040.0 ); break; } + case I{ 8 }: { result = static_cast< T >( 40320.0 ); break; } + case I{ 9 }: { result = static_cast< T >( 362880.0 ); break; } + case I{ 10 }: { result = static_cast< T >( 3628800.0 ); break; } + case I{ 11 }: { result = static_cast< T >( 39916800.0 ); break; } + case I{ 12 }: { result = static_cast< T >( 479001600.0 ); break; } + case I{ 13 }: { result = static_cast< T >( 6227020800.0 ); break; } + case I{ 14 }: { result = static_cast< T >( 87178291200.0 ); break; } + case I{ 15 }: { result = static_cast< T >( 1307674368000.0 ); break; } + case I{ 16 }: { result = static_cast< T >( 20922789888000.0 ); break; } + case I{ 17 }: { result = static_cast< T >( 355687428096000.0 ); break; } + case I{ 18 }: { result = static_cast< T >( 6402373705728000.0 ); break; } + case I{ 19 }: { result = static_cast< T >( 121645100408832000.0 ); break; } default: { - I const nd = n / I{ 20 }; - I const remainder = n % I{ 20 }; - - result = static_cast< T >( nd ) * - static_cast< T >( 2432902008176640000 ); + T ii = T{ 21 }; + T const nn = static_cast< T >( n ); - if( remainder != I{ 0 } ) + result = static_cast< T >( 2432902008176640000.0 ); + while( ii <= nn ) { - result += SIXTRL_CXX_NAMESPACE::Math_factorial< - I, T >( remainder ); + result *= ii; + ii += T{ 1 }; } } }; @@ -79,29 +89,41 @@ namespace SIXTRL_CXX_NAMESPACE { T result = T{ 1.0 }; + /* generated by the following python code: + + from scipy.special import factorial + import decimal + from decimal import Decimal + for ii in range( 0, 21 ): + print( f"case I{{ {ii:2d} }}: {{ result = T{{ " + + f"{Decimal(1)/Decimal(factorial(ii,exact=True)):28.27g} }};" + + f"break; }}" ) + + */ + switch( n ) { - case I{ 0 }: { result = T{ 1 }; break; } - case I{ 1 }: { result = T{ 1 }; break; } - case I{ 2 }: { result = T{ 0.5 }; break; } - case I{ 3 }: { result = T{ 0.166666666666666657 }; break; } - case I{ 4 }: { result = T{ 0.0416666666666666644 }; break; } - case I{ 5 }: { result = T{ 0.00833333333333333322 }; break; } - case I{ 6 }: { result = T{ 0.00138888888888888894 }; break; } - case I{ 7 }: { result = T{ 0.000198412698412698413 }; break; } - case I{ 8 }: { result = T{ 2.48015873015873016e-05 }; break; } - case I{ 9 }: { result = T{ 2.75573192239858925e-06 }; break; } - case I{ 10 }: { result = T{ 2.75573192239858883e-07 }; break; } - case I{ 11 }: { result = T{ 2.50521083854417202e-08 }; break; } - case I{ 12 }: { result = T{ 2.50521083854417202e-08 }; break; } - case I{ 13 }: { result = T{ 1.60590438368216133e-10 }; break; } - case I{ 14 }: { result = T{ 1.14707455977297245e-11 }; break; } - case I{ 15 }: { result = T{ 7.64716373181981641e-13 }; break; } - case I{ 16 }: { result = T{ 4.77947733238738525e-14 }; break; } - case I{ 17 }: { result = T{ 2.8114572543455206e-15 }; break; } - case I{ 18 }: { result = T{ 1.56192069685862253e-16 }; break; } - case I{ 19 }: { result = T{ 8.2206352466243295e-18 }; break; } - case I{ 20 }: { result = T{ 4.11031762331216484e-19 }; break; } + case I{ 0 }: { result = static_cast< T >( 1. ); break; } + case I{ 1 }: { result = static_cast< T >( 1. ); break; } + case I{ 2 }: { result = static_cast< T >( 0.5 ); break; } + case I{ 3 }: { result = static_cast< T >( 0.1666666666666666666666666667 ); break; } + case I{ 4 }: { result = static_cast< T >( 0.04166666666666666666666666667 ); break; } + case I{ 5 }: { result = static_cast< T >( 0.008333333333333333333333333333 ); break; } + case I{ 6 }: { result = static_cast< T >( 0.001388888888888888888888888889 ); break; } + case I{ 7 }: { result = static_cast< T >( 0.0001984126984126984126984126984 ); break; } + case I{ 8 }: { result = static_cast< T >( 0.00002480158730158730158730158730 ); break; } + case I{ 9 }: { result = static_cast< T >( 0.000002755731922398589065255731922 ); break; } + case I{ 10 }: { result = static_cast< T >( 2.755731922398589065255731922E-7 ); break; } + case I{ 11 }: { result = static_cast< T >( 2.505210838544171877505210839E-8 ); break; } + case I{ 12 }: { result = static_cast< T >( 2.087675698786809897921009032E-9 ); break; } + case I{ 13 }: { result = static_cast< T >( 1.605904383682161459939237717E-10 ); break; } + case I{ 14 }: { result = static_cast< T >( 1.147074559772972471385169798E-11 ); break; } + case I{ 15 }: { result = static_cast< T >( 7.647163731819816475901131986E-13 ); break; } + case I{ 16 }: { result = static_cast< T >( 4.779477332387385297438207491E-14 ); break; } + case I{ 17 }: { result = static_cast< T >( 2.811457254345520763198945583E-15 ); break; } + case I{ 18 }: { result = static_cast< T >( 1.561920696858622646221636435E-16 ); break; } + case I{ 19 }: { result = static_cast< T >( 8.220635246624329716955981237E-18 ); break; } + case I{ 20 }: { result = static_cast< T >( 4.110317623312164858477990618E-19 ); break; } default: { @@ -114,7 +136,7 @@ namespace SIXTRL_CXX_NAMESPACE } } -template< typename I, typename T = SIXTRL_REAL_T > +template< typename I = SIXTRL_UINT64_T, typename T = SIXTRL_REAL_T > SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN typename std::enable_if< std::is_integral< I >::value, T >::type NS(Math_factorial)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< @@ -123,7 +145,7 @@ NS(Math_factorial)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< return SIXTRL_CXX_NAMESPACE::Math_factorial< I, T >( n ); } -template< typename I, typename T = SIXTRL_REAL_T > +template< typename I = SIXTRL_UINT64_T, typename T = SIXTRL_REAL_T > SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN typename std::enable_if< std::is_integral< I >::value, T >::type NS(Math_inv_factorial)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< @@ -138,12 +160,16 @@ NS(Math_inv_factorial)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< extern "C" { #endif /* C++, Host */ +#if !defined( __cplusplus ) + SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(Math_factorial)( SIXTRL_UINT64_T const n ) SIXTRL_NOEXCEPT; SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(Math_inv_factorial)( SIXTRL_UINT64_T const n ) SIXTRL_NOEXCEPT; +#endif /* C++ */ + #if defined( __cplusplus ) && !defined( _GPUCODE ) } #endif /* C++, Host */ @@ -152,49 +178,51 @@ SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(Math_inv_factorial)( extern "C" { #endif /* C++, Host */ +#if !defined( __cplusplus ) + SIXTRL_INLINE SIXTRL_REAL_T NS(Math_factorial)( SIXTRL_UINT64_T const n ) SIXTRL_NOEXCEPT { - typedef SIXTRL_REAL_T real_t; - typedef SIXTRL_UINT64_T uint_t; + typedef SIXTRL_REAL_T real_type; + typedef SIXTRL_UINT64_T uint_type; - real_t result = ( real_t )1; + real_type result = ( real_type )1; switch( n ) { - case ( uint_t )0: { result = ( real_t )1; break; } - case ( uint_t )1: { result = ( real_t )1; break; } - case ( uint_t )2: { result = ( real_t )2; break; } - case ( uint_t )3: { result = ( real_t )6; break; } - case ( uint_t )4: { result = ( real_t )24; break; } - case ( uint_t )5: { result = ( real_t )120; break; } - case ( uint_t )6: { result = ( real_t )720; break; } - case ( uint_t )7: { result = ( real_t )5040; break; } - case ( uint_t )8: { result = ( real_t )40320; break; } - case ( uint_t )9: { result = ( real_t )362880; break; } - case ( uint_t )10: { result = ( real_t )3628800; break; } - case ( uint_t )11: { result = ( real_t )3628800; break; } - case ( uint_t )12: { result = ( real_t )479001600; break; } - case ( uint_t )13: { result = ( real_t )6227020800; break; } - case ( uint_t )14: { result = ( real_t )87178291200; break; } - case ( uint_t )15: { result = ( real_t )1307674368000; break; } - case ( uint_t )16: { result = ( real_t )20922789888000; break; } - case ( uint_t )17: { result = ( real_t )355687428096000; break; } - case ( uint_t )18: { result = ( real_t )6402373705728000; break; } - case ( uint_t )19: { result = ( real_t )121645100408832000; break; } - case ( uint_t )20: { result = ( real_t )2432902008176640000; break; } + case ( uint_type )0: { result = ( real_type ) 1.0; break; } + case ( uint_type )1: { result = ( real_type ) 1.0; break; } + case ( uint_type )2: { result = ( real_type ) 2.0; break; } + case ( uint_type )3: { result = ( real_type ) 6.0; break; } + case ( uint_type )4: { result = ( real_type ) 24.0; break; } + case ( uint_type )5: { result = ( real_type ) 120.0; break; } + case ( uint_type )6: { result = ( real_type ) 720.0; break; } + case ( uint_type )7: { result = ( real_type ) 5040.0; break; } + case ( uint_type )8: { result = ( real_type ) 40320.0; break; } + case ( uint_type )9: { result = ( real_type ) 362880.0; break; } + case ( uint_type )10: { result = ( real_type ) 3628800.0; break; } + case ( uint_type )11: { result = ( real_type ) 39916800.0; break; } + case ( uint_type )12: { result = ( real_type ) 479001600.0; break; } + case ( uint_type )13: { result = ( real_type ) 6227020800.0; break; } + case ( uint_type )14: { result = ( real_type ) 87178291200.0; break; } + case ( uint_type )15: { result = ( real_type ) 1307674368000.0; break; } + case ( uint_type )16: { result = ( real_type ) 20922789888000.0; break; } + case ( uint_type )17: { result = ( real_type ) 355687428096000.0; break; } + case ( uint_type )18: { result = ( real_type ) 6402373705728000.0; break; } + case ( uint_type )19: { result = ( real_type ) 121645100408832000.0; break; } default: { - uint_t const nd = n / ( uint_t )20; - uint_t const remainder = n % ( uint_t )20; + real_type ii = ( real_type )21; + real_type const nn = ( real_type )n; - result = ( ( real_t )nd ) * ( real_t )2432902008176640000; - - if( remainder != ( uint_t )0 ) + result = ( real_type )2432902008176640000.0; + while( ii <= nn ) { - result += NS(Math_factorial)( remainder ); + result *= ii; + ii += ( real_type )1.0; } + } }; @@ -204,34 +232,44 @@ SIXTRL_INLINE SIXTRL_REAL_T NS(Math_factorial)( SIXTRL_INLINE SIXTRL_REAL_T NS(Math_inv_factorial)( SIXTRL_UINT64_T const n ) SIXTRL_NOEXCEPT { - typedef SIXTRL_REAL_T real_t; + typedef SIXTRL_REAL_T real_t; typedef SIXTRL_UINT64_T uint_t; + /* Generated by python code: + from scipy.special import factorial + import decimal + from decimal import Decimal + for ii in range( 0, 21 ): + print( f"case ( uint_t ){ii}: {{ result = ( real_t )" + + f"{Decimal(1)/Decimal(factorial(ii,exact=True)):g}; " + + f"break; }}" ) + */ real_t result = ( real_t )1.0; switch( n ) { - case ( uint_t )0: { result = ( real_t )1.0; break; } - case ( uint_t )1: { result = ( real_t )1.0; break; } - case ( uint_t )2: { result = ( real_t )0.5; break; } - case ( uint_t )3: { result = ( real_t )0.166666666666666657; break; } - case ( uint_t )4: { result = ( real_t )0.0416666666666666644; break; } - case ( uint_t )5: { result = ( real_t )0.00833333333333333322; break; } - case ( uint_t )6: { result = ( real_t )0.00138888888888888894; break; } - case ( uint_t )7: { result = ( real_t )0.000198412698412698413; break; } - case ( uint_t )8: { result = ( real_t )2.48015873015873016e-05; break; } - case ( uint_t )9: { result = ( real_t )2.75573192239858925e-06; break; } - case ( uint_t )10: { result = ( real_t )2.75573192239858883e-07; break; } - case ( uint_t )11: { result = ( real_t )2.50521083854417202e-08; break; } - case ( uint_t )12: { result = ( real_t )2.50521083854417202e-08; break; } - case ( uint_t )13: { result = ( real_t )1.60590438368216133e-10; break; } - case ( uint_t )14: { result = ( real_t )1.14707455977297245e-11; break; } - case ( uint_t )15: { result = ( real_t )7.64716373181981641e-13; break; } - case ( uint_t )16: { result = ( real_t )4.77947733238738525e-14; break; } - case ( uint_t )17: { result = ( real_t )2.8114572543455206e-15; break; } - case ( uint_t )18: { result = ( real_t )1.56192069685862253e-16; break; } - case ( uint_t )19: { result = ( real_t )8.2206352466243295e-18; break; } - case ( uint_t )20: { result = ( real_t )4.11031762331216484e-19; break; } + case ( uint_t )0: { result = ( real_t )1; break; } + case ( uint_t )1: { result = ( real_t )1; break; } + case ( uint_t )2: { result = ( real_t )0.5; break; } + case ( uint_t )3: { result = ( real_t )0.1666666666666666666666666667; break; } + case ( uint_t )4: { result = ( real_t )0.04166666666666666666666666667; break; } + case ( uint_t )5: { result = ( real_t )0.008333333333333333333333333333; break; } + case ( uint_t )6: { result = ( real_t )0.001388888888888888888888888889; break; } + case ( uint_t )7: { result = ( real_t )0.0001984126984126984126984126984; break; } + case ( uint_t )8: { result = ( real_t )0.00002480158730158730158730158730; break; } + case ( uint_t )9: { result = ( real_t )0.000002755731922398589065255731922; break; } + case ( uint_t )10: { result = ( real_t )2.755731922398589065255731922e-7; break; } + case ( uint_t )11: { result = ( real_t )2.505210838544171877505210839e-8; break; } + case ( uint_t )12: { result = ( real_t )2.087675698786809897921009032e-9; break; } + case ( uint_t )13: { result = ( real_t )1.605904383682161459939237717e-10; break; } + case ( uint_t )14: { result = ( real_t )1.147074559772972471385169798e-11; break; } + case ( uint_t )15: { result = ( real_t )7.647163731819816475901131986e-13; break; } + case ( uint_t )16: { result = ( real_t )4.779477332387385297438207491e-14; break; } + case ( uint_t )17: { result = ( real_t )2.811457254345520763198945583e-15; break; } + case ( uint_t )18: { result = ( real_t )1.561920696858622646221636435e-16; break; } + case ( uint_t )19: { result = ( real_t )8.220635246624329716955981237e-18; break; } + case ( uint_t )20: { result = ( real_t )4.110317623312164858477990618e-19; break; } + default: { @@ -241,6 +279,7 @@ SIXTRL_INLINE SIXTRL_REAL_T NS(Math_inv_factorial)( return result; } +#endif /* C++ */ #if defined( __cplusplus ) && !defined( _GPUCODE ) } From 6978fb827720ab6284090202bfb3ac2671d35ccb Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 13:55:46 +0200 Subject: [PATCH 52/77] common: adds sincos method and provides fallback if not available --- sixtracklib/common/internal/math_functions.h | 80 ++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/sixtracklib/common/internal/math_functions.h b/sixtracklib/common/internal/math_functions.h index 4fe91b8e9..93ece2111 100644 --- a/sixtracklib/common/internal/math_functions.h +++ b/sixtracklib/common/internal/math_functions.h @@ -142,6 +142,41 @@ namespace SIXTRL_CXX_NAMESPACE /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + template< typename T > + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN + void sincos( typename TypeMethodParamTraits< T >::const_argument_type arg, + SIXTRL_RESULT_PTR_DEC typename TypeMethodParamTraits< T >::pointer sin_res, + SIXTRL_RESULT_PTR_DEC typename TypeMethodParamTraits< T >::pointer cos_res + ) SIXTRL_NOEXCEPT + { + SIXTRL_ASSERT( sin_res != nullptr ); + SIXTRL_ASSERT( cos_res != nullptr ); + #if ( !defined( SIXTRL_NO_SINCOS ) ) && \ + ( defined( __OPENCL_C_VERSION__ ) ) + + *sin_res = ::sincos( arg, cos_res ); + + #elif ( !defined( XSUITE_NO_SINCOS ) ) && \ + ( ( defined( __CUDA_ARCH__ ) ) || \ + ( defined( __GNUC__ ) && !defined( __clang__ ) && \ + !defined( __STRICT_ANSI__ ) && !defined( __INTEL_COMPILER ) && \ + defined( __NO_MATH_ERRNO__ ) ) ) + + ::sincos( arg, sin_res, cos_res ); + + #else + + using std::sin; + using std::cos; + + *sin_res = sin( arg ); + *cos_res = cos( arg ); + + #endif + } + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + template< typename T > SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN typename TypeMethodParamTraits< T >::value_type tan( typename @@ -476,6 +511,20 @@ NS(acos)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +template< typename T > +SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN void NS(sincos)( + typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< + T >::const_argument_type arg, + SIXTRL_RESULT_PTR_DEC typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< + T >::pointer SIXTRL_RESTRICT sin_res, + SIXTRL_RESULT_PTR_DEC typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< + T >::pointer SIXTRL_RESTRICT cos_res ) SIXTRL_NOEXCEPT +{ + SIXTRL_CXX_NAMESPACE::sincos( arg, sin_res, cos_res ); +} + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + template< typename T > SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< T >::value_type @@ -685,6 +734,11 @@ NS(asin)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT; SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(cos)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT; +SIXTRL_STATIC SIXTRL_FN void NS(sincos)( SIXTRL_REAL_T const arg, + SIXTRL_RESULT_PTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT sin_res, + SIXTRL_RESULT_PTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT cos_res +) SIXTRL_NOEXCEPT; + SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(acos)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT; @@ -771,6 +825,32 @@ SIXTRL_INLINE SIXTRL_REAL_T NS(cos)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT return cos( arg ); } +SIXTRL_INLINE void NS(sincos)( SIXTRL_REAL_T const arg, + SIXTRL_RESULT_PTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT sin_res, + SIXTRL_RESULT_PTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT cos_res +) SIXTRL_NOEXCEPT +{ + SIXTRL_ASSERT( sin_res != SIXTRL_NULLPTR ); + SIXTRL_ASSERT( cos_res != SIXTRL_NULLPTR ); + + #if ( !defined( SIXTRL_NO_SINCOS ) ) && \ + ( defined( __OPENCL_C_VERSION__ ) ) + *sin_res = sincos( arg, cos_res ); + + #elif ( !defined( XSUITE_NO_SINCOS ) ) && \ + ( ( defined( __CUDA_ARCH__ ) ) || \ + ( defined( __GNUC__ ) && !defined( __clang__ ) && \ + !defined( __STRICT_ANSI__ ) && !defined( __INTEL_COMPILER ) && \ + defined( __NO_MATH_ERRNO__ ) ) ) + sincos( arg, sin_res, cos_res ); + + #else + *sin_res = sin( arg ); + *cos_res = cos( arg ); + + #endif +} + SIXTRL_INLINE SIXTRL_REAL_T NS(acos)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT { #if defined( __cplusplus ) && !defined( _GPUCODE ) /* ADL */ From 9999c81d8186209d7d38e88199c4b2cc6e5fdd40 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 13:57:15 +0200 Subject: [PATCH 53/77] common: updates NS(pow_int_exp) - removes recursive limitations -> this fixes issues on OpenCL - provide a "binary" like implementation with minimal temporary / local storage requirements optimised for GPUs --- sixtracklib/common/internal/math_functions.h | 295 ++++++++----------- 1 file changed, 126 insertions(+), 169 deletions(-) diff --git a/sixtracklib/common/internal/math_functions.h b/sixtracklib/common/internal/math_functions.h index 93ece2111..aefd07aea 100644 --- a/sixtracklib/common/internal/math_functions.h +++ b/sixtracklib/common/internal/math_functions.h @@ -295,106 +295,78 @@ namespace SIXTRL_CXX_NAMESPACE std::is_integral< I >(), typename TypeMethodParamTraits< T >::value_type >::type pow_int_exp( - typename TypeMethodParamTraits< T >::const_argument_type base, + typename TypeMethodParamTraits< T >::const_argument_type x, typename TypeMethodParamTraits< I >::const_argument_type n ) SIXTRL_NOEXCEPT { - #if defined( _GPUCODE ) && defined( __OPENCL_VERSION__ ) - return pown( base, n ); - #elif ( __cplusplus >= 201103L ) - #if !defined( _GPUCODE ) /* ADL! */ - using std::pow; - #endif /* ADL / Host */ - return pow( base, n ); - #else - #if !defined( _GPUCODE ) /* ADL! */ - using std::abs; - #endif /* ADL / Host */ - namespace st = SIXTRL_CXX_NAMESPACE; - typedef typename st::TypeMethodParamTraits< I >::value_type int_t; - typedef typename st::TypeMethodParamTraits< T >::value_type real_t; + typedef typename TypeMethodParamTraits< T >::value_type real_type; - real_t result; - int_t const pos_exp = abs( n ); + real_type x_n = x; - SIXTRL_ASSERT( ( st::Type_comp_all_more< T >( - st::abs< T >( base ), real_t{ 0 } ) ) || ( n > int_t{ 0 } ) ); + unsigned int const n_div_16 = n >> 4u; + unsigned int const n_mod_16 = n - ( n_div_16 << 4u ); - switch( pos_exp ) + switch( n_mod_16 ) { - case 0: - { - result = real_t{ 1 }; - break; - } - - case 1: - { - result = base; - break; - } - - case 2: - { - result = base * base; - break; - } - - case 3: - { - result = base * base * base; - break; - } - - case 4: - { - real_t const base_squ = base * base; - result = base_squ * base_squ; - break; - } + case 0u: { x_n = real_type{ 1.0 }; break; } + case 1u: { break; } + case 2u: { x_n *= x; break; } + case 3u: { x_n *= x * x; break; } + case 4u: { x_n *= x; x_n *= x_n; break; } + case 5u: { x_n *= x; x_n *= x_n * x; break; } + case 6u: { x_n *= x * x; x_n *= x_n; break; } + case 7u: { x_n *= x * x; x_n *= x_n * x; break; } + case 8u: { x_n *= x; x_n *= x_n; x_n *= x_n; break; } + case 9u: { x_n *= x * x; x_n *= x_n * x_n; break; } + case 10u: { x_n *= x * x; x_n *= x_n * x_n * x; break; } + case 11u: { x_n *= x; x_n *= x_n * x; x_n *= x_n * x; break; } + case 12u: { x_n *= x * x; x_n *= x_n; x_n *= x_n; break; } + case 13u: { x_n *= x * x; x_n *= x_n; x_n *= x_n * x; break; } + case 14u: { x_n *= x * x; x_n *= x_n * x; x_n *= x_n; break; } + case 15u: { x_n *= x; x_n *= x_n * x; x_n *= x_n * x_n; break; } + default: { x_n = real_type{ 0.0 }; } + }; - case 5: - { - real_t const base_squ = base * base; - result = base_squ * base_squ * base; - break; - } + if( n_div_16 > 0u ){ x *= x; x *= x; x *= x; x *= x; } - case 6: - { - real_t const base_cub = base * base * base; - result = base_cub * base_cub; - break; - } + switch( n_div_16 ) + { + case 0u: { x_n = ( n_mod_16 != 0u ) ? x_n : real_type{ 1.0 }; break; } + case 1u: { x_n *= x; break; } + case 2u: { x *= x; x_n *= x; break; } + case 3u: { x_n *= x * x * x; break; } + case 4u: { x *= x; x *= x; x_n *= x; break; } + case 5u: { x_n *= x; x *= x; x *= x; x_n *= x; break; } + case 6u: { x *= x * x; x *= x; x_n *= x; break; } + case 7u: { x_n *= x; x *= x * x; x *= x; x_n *= x; break; } + case 8u: { x *= x; x *= x; x*= x; x_n *= x; break; } + case 9u: { x *= x * x; x *= x * x; x_n *= x; break; } + case 10u: { x_n *= x; x *= x * x; x *= x * x; x_n *= x; break; } + case 11u: { x_n *= x * x; x *= x * x; x *= x * x; x_n *= x; break; } + case 12u: { x *= x; x *= x; x_n *= x; x *= x; x_n *= x; break; } + case 13u: { x_n *= x; x *= x; x *= x; x_n *= x; x *= x; + x_n *= x; break; } + + case 14u: { x_n *= x * x; x *= x; x *= x; x_n *= x; x *= x; + x_n *= x; break; } + + case 15u: { x *= x * x; x_n *= x * x; x *= x * x; x_n *= x; break; } - case 7: + default: { - real_t const base_cub = base * base * base; - result = base_cub * base_cub * base; - break; - } + unsigned int ii = 0u; + unsigned int nn = n_div_16 % 16u; - case 8: - { - real_t const base_squ = base * base; - real_t const base_quad = base_squ * base_squ; - result = base_quad * base_quad; - break; - } + for( ; ii < nn ; ++ii ) x_n *= x; - default: - { - real_t const base_pow_8 = - st::pow_int_exp< T, I >( base, int_t{ 8 } ); + x *= x; x *= x; x *= x; x *= x; + nn = ( n_div_16 - nn ) >> 4u; - result = st::pow_int_exp< T, I >( base_pow_8, pos_exp >> 3 ); - result *= st::pow_int_exp< T, I >( base_pow_8, - pos_exp - ( ( pos_exp >> 3 ) << 3 ) ); + for( ii = 0u ; ii < nn ; ++ii ) x_n *= x; } - }; + } - return ( n >= int_t{ 0 } ) ? result : real_t{ 1 } / result; - #endif + return x_n; } template< typename T > @@ -774,8 +746,8 @@ NS(pow_positive_base)( SIXTRL_REAL_T const base, SIXTRL_REAL_T const n ) SIXTRL_NOEXCEPT; SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T -NS(pow_int_exp)( SIXTRL_REAL_T const base, - SIXTRL_INT64_T const n ) SIXTRL_NOEXCEPT; +NS(pow_int_exp)( SIXTRL_REAL_T base, + SIXTRL_UINT64_T const n ) SIXTRL_NOEXCEPT; SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(min)( SIXTRL_REAL_T const lhs, SIXTRL_REAL_T const rhs ) SIXTRL_NOEXCEPT; @@ -962,99 +934,84 @@ SIXTRL_INLINE SIXTRL_REAL_T NS(pow_positive_base)( #endif } -SIXTRL_INLINE SIXTRL_REAL_T NS(pow_int_exp)( SIXTRL_REAL_T const base, - SIXTRL_INT64_T const n ) SIXTRL_NOEXCEPT +SIXTRL_INLINE SIXTRL_REAL_T NS(pow_int_exp)( SIXTRL_REAL_T x, + SIXTRL_UINT64_T const n ) SIXTRL_NOEXCEPT { - #if defined( _GPUCODE ) && defined( __OPENCL_VERSION__ ) - return pown( base, n ); - #elif defined( __cplusplus ) && ( __cplusplus >= 201103L ) - #if !defined( _GPUCODE ) /* ADL */ - using std::pow; - #endif /* ADL / Host */ - return pow( base, n ); - #else - #if !defined( _GPUCODE ) && defined( __cplusplus ) /* ADL */ - using std::llabs; - #endif /* ADL / Host */ - typedef SIXTRL_REAL_T real_t; - typedef SIXTRL_INT64_T int_t; + #if defined( __OPENCL_C_VERSION__ ) + return pown( x, n ); + #else /* !defined( __OPENCL_C_VERSION__ ) */ - real_t result; - int_t const pos_exp = llabs( n ); + typedef SIXTRL_REAL_T real_type; - switch( pos_exp ) - { - case 0: - { - result = ( real_t )1; - break; - } + real_type x_n = x; - case 1: - { - result = base; - break; - } + unsigned int const n_div_16 = n >> 4u; + unsigned int const n_mod_16 = n - ( n_div_16 << 4u ); - case 2: - { - result = base * base; - break; - } + SIXTRL_ASSERT( n >= 0 ); - case 3: - { - result = base * base * base; - break; - } - - case 4: - { - real_t const base_squ = base * base; - result = base_squ * base_squ; - break; - } - - case 5: - { - real_t const base_squ = base * base; - result = base_squ * base_squ * base; - break; - } - - case 6: - { - real_t const base_cub = base * base * base; - result = base_cub * base_cub; - break; - } + switch( n_mod_16 ) + { + case 0u: { x_n = ( real_type )1.0; break; } + case 1u: { break; } + case 2u: { x_n *= x; break; } + case 3u: { x_n *= x * x; break; } + case 4u: { x_n *= x; x_n *= x_n; break; } + case 5u: { x_n *= x; x_n *= x_n * x; break; } + case 6u: { x_n *= x * x; x_n *= x_n; break; } + case 7u: { x_n *= x * x; x_n *= x_n * x; break; } + case 8u: { x_n *= x; x_n *= x_n; x_n *= x_n; break; } + case 9u: { x_n *= x * x; x_n *= x_n * x_n; break; } + case 10u: { x_n *= x * x; x_n *= x_n * x_n * x; break; } + case 11u: { x_n *= x; x_n *= x_n * x; x_n *= x_n * x; break; } + case 12u: { x_n *= x * x; x_n *= x_n; x_n *= x_n; break; } + case 13u: { x_n *= x * x; x_n *= x_n; x_n *= x_n * x; break; } + case 14u: { x_n *= x * x; x_n *= x_n * x; x_n *= x_n; break; } + case 15u: { x_n *= x; x_n *= x_n * x; x_n *= x_n * x_n; break; } + default: { x_n = ( real_type )0.0; } + }; + + if( n_div_16 > 0u ){ x *= x; x *= x; x *= x; x *= x; } + + switch( n_div_16 ) + { + case 0u: { x_n = ( n_mod_16 != 0u ) ? x_n : ( real_type )1.0; break; } + case 1u: { x_n *= x; break; } + case 2u: { x *= x; x_n *= x; break; } + case 3u: { x_n *= x * x * x; break; } + case 4u: { x *= x; x *= x; x_n *= x; break; } + case 5u: { x_n *= x; x *= x; x *= x; x_n *= x; break; } + case 6u: { x *= x * x; x *= x; x_n *= x; break; } + case 7u: { x_n *= x; x *= x * x; x *= x; x_n *= x; break; } + case 8u: { x *= x; x *= x; x*= x; x_n *= x; break; } + case 9u: { x *= x * x; x *= x * x; x_n *= x; break; } + case 10u: { x_n *= x; x *= x * x; x *= x * x; x_n *= x; break; } + case 11u: { x_n *= x * x; x *= x * x; x *= x * x; x_n *= x; break; } + case 12u: { x *= x; x *= x; x_n *= x; x *= x; x_n *= x; break; } + case 13u: { x_n *= x; x *= x; x *= x; x_n *= x; x *= x; + x_n *= x; break; } + + case 14u: { x_n *= x * x; x *= x; x *= x; x_n *= x; x *= x; + x_n *= x; break; } + + case 15u: { x *= x * x; x_n *= x * x; x *= x * x; x_n *= x; break; } + + default: + { + unsigned int ii = 0u; + unsigned int nn = n_div_16 % 16u; - case 7: - { - real_t const base_cub = base * base * base; - result = base_cub * base_cub * base; - break; - } + for( ; ii < nn ; ++ii ) x_n *= x; - case 8: - { - real_t const base_squ = base * base; - real_t const base_quad = base_squ * base_squ; - result = base_quad * base_quad; - break; - } + x *= x; x *= x; x *= x; x *= x; + nn = ( n_div_16 - nn ) >> 4u; - default: - { - real_t const base_pow_8 = NS(pow_int_exp)( base, ( int_t )8 ); - result = NS(pow_int_exp)( base_pow_8, pos_exp >> 3 ); - result *= NS(pow_int_exp)( base_pow_8, - pos_exp - ( ( pos_exp >> 3 ) << 3 ) ); - } - }; + for( ii = 0u ; ii < nn ; ++ii ) x_n *= x; + } + }; - return ( n >= ( int_t )0 ) ? result : ( real_t )1 / result; - #endif + return x_n; + #endif /* defined( __OPENCL_C_VERSION__ ) */ } SIXTRL_INLINE SIXTRL_REAL_T NS(min)( From 6716fb9cd32e6d9b6700adce4ff02bd7b4a4f87b Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:00:07 +0200 Subject: [PATCH 54/77] common: adds NS(round), NS(floor), and NS(ceil) to the mathematical methods --- sixtracklib/common/internal/math_functions.h | 138 +++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/sixtracklib/common/internal/math_functions.h b/sixtracklib/common/internal/math_functions.h index aefd07aea..5ae0e6cf3 100644 --- a/sixtracklib/common/internal/math_functions.h +++ b/sixtracklib/common/internal/math_functions.h @@ -417,6 +417,69 @@ namespace SIXTRL_CXX_NAMESPACE #endif /* ADL / Host */ return min( lhs, rhs ); } + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + template< typename T > + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN + typename std::enable_if< SIXTRL_CXX_NAMESPACE::Type_is_scalar< T >(), + typename TypeMethodParamTraits< T >::value_type >::type + round( typename TypeMethodParamTraits< T >::const_argument_type arg + ) SIXTRL_NOEXCEPT + { + #if !defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) + typedef typename TypeMethodParamTraits< T >::value_type value_type; + typedef SIXTRL_INT64_T int_type; + #endif /* !defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ + + #if !defined( _GPUCODE ) /* ADL! */ + using std::round; + #endif /* ADL / Host */ + + #if defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) + return round( arg ); + #else + return static_cast< value_type >( static_cast< int_type >( + value_type{ 0.5 } + arg ) ); + #endif /* defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ + } + + template< typename T > + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN + typename std::enable_if< SIXTRL_CXX_NAMESPACE::Type_is_scalar< T >(), + typename TypeMethodParamTraits< T >::value_type >::type + floor( typename TypeMethodParamTraits< T >::const_argument_type arg + ) SIXTRL_NOEXCEPT + { + #if !defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) + typedef typename TypeMethodParamTraits< T >::value_type value_type; + typedef SIXTRL_INT64_T int_type; + #endif /* !defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ + + #if !defined( _GPUCODE ) /* ADL! */ + using std::floor; + #endif /* ADL / Host */ + + #if defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) + return round( arg ); + #else + return static_cast< value_type >( static_cast< int_type >( arg ) ); + #endif /* defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ + } + + template< typename T > + SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN + typename std::enable_if< SIXTRL_CXX_NAMESPACE::Type_is_scalar< T >(), + typename TypeMethodParamTraits< T >::value_type >::type + ceil( typename TypeMethodParamTraits< T >::const_argument_type arg + ) SIXTRL_NOEXCEPT + { + #if !defined( _GPUCODE ) /* ADL! */ + using std::ceil; + #endif /* ADL / Host */ + + return ceil( arg ); + } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ @@ -691,6 +754,35 @@ typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< T >::value_type NS(min)( return SIXTRL_CXX_NAMESPACE::min( lhs, rhs ); } +/* ------------------------------------------------------------------------ */ + +template< typename T > +SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN +typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< T >::value_type +NS(round)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< + T >::const_argument_type arg ) SIXTRL_NOEXCEPT +{ + return SIXTRL_CXX_NAMESPACE::round< T >( arg ); +} + +template< typename T > +SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN +typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< T >::value_type +NS(floor)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< + T >::const_argument_type arg ) SIXTRL_NOEXCEPT +{ + return SIXTRL_CXX_NAMESPACE::floor< T >( arg ); +} + +template< typename T > +SIXTRL_STATIC SIXTRL_INLINE SIXTRL_FN +typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< T >::value_type +NS(ceil)( typename SIXTRL_CXX_NAMESPACE::TypeMethodParamTraits< + T >::const_argument_type arg ) SIXTRL_NOEXCEPT +{ + return SIXTRL_CXX_NAMESPACE::ceil< T >( arg ); +} + #endif /* defined( __cplusplus ) */ #if defined( __cplusplus ) && !defined( _GPUCODE ) @@ -755,6 +847,15 @@ NS(min)( SIXTRL_REAL_T const lhs, SIXTRL_REAL_T const rhs ) SIXTRL_NOEXCEPT; SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(max)( SIXTRL_REAL_T const lhs, SIXTRL_REAL_T const rhs ) SIXTRL_NOEXCEPT; +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T +NS(floor)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T +NS(round)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T +NS(ceil)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT; + #if defined( __cplusplus ) && !defined( _GPUCODE ) } #endif /* defined( __cplusplus ) && !defined( _GPUCODE ) */ @@ -1040,6 +1141,43 @@ SIXTRL_INLINE SIXTRL_REAL_T NS(max)( #endif /* _GPUCODE */ } +SIXTRL_INLINE SIXTRL_REAL_T NS(floor)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT +{ + #if !defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) + return ( SIXTRL_REAL_T )( SIXTRL_INT64_T )( arg ); + #else /* defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ + #if defined( __cplusplus ) && !defined( _GPUCODE ) + using std::floor; + #endif /* defined( __cplusplus ) && !defined( _GPUCODE ) */ + return floor( arg ); + #endif /* defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ +} + + +SIXTRL_INLINE SIXTRL_REAL_T NS(round)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT +{ + #if !defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) + return ( SIXTRL_REAL_T )( SIXTRL_INT64_T )( + ( SIXTRL_REAL_T )0.5 + arg ); + + #else /* defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ + #if defined( __cplusplus ) && !defined( _GPUCODE ) + using std::round; + #endif /* defined( __cplusplus ) && !defined( _GPUCODE ) */ + + return round( arg ); + #endif /* defined( SIXTRL_REAL_USE_SAFE_ROUNDING ) */ +} + +SIXTRL_INLINE SIXTRL_REAL_T NS(ceil)( SIXTRL_REAL_T const arg ) SIXTRL_NOEXCEPT +{ + #if defined( __cplusplus ) && !defined( _GPUCODE ) + using std::ceil; + #endif /* defined( __cplusplus ) && !defined( _GPUCODE ) */ + + return ceil( arg ); +} + #if defined( __cplusplus ) && !defined( _GPUCODE ) } #endif /* defined( __cplusplus ) && !defined( _GPUCODE ) */ From 98db7d8c4de6d4f99c8c479f96dc6d21cb38694b Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:00:50 +0200 Subject: [PATCH 55/77] common: adds decorator for (private memory) results --- sixtracklib/common/definitions.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sixtracklib/common/definitions.h b/sixtracklib/common/definitions.h index a55a2f1ae..ef4f1a260 100644 --- a/sixtracklib/common/definitions.h +++ b/sixtracklib/common/definitions.h @@ -189,6 +189,12 @@ #define SIXTRL_FN #endif /* SIXTRL_FN */ + /* ---------------------------------------------------------------- */ + + #if !defined( SIXTRL_RESULT_PTR_DEC ) + #define SIXTRL_RESULT_PTR_DEC + #endif /* !defined( SIXTRL_RESULT_PTR_DEC ) */ + #elif defined( __CUDACC__ ) /* ---------------------------------------------------------------- */ /* assert: */ @@ -316,6 +322,12 @@ #define SIXTRL_FN __host__ __device__ #endif /* SIXTRL_FN */ + /* ---------------------------------------------------------------- */ + + #if !defined( SIXTRL_RESULT_PTR_DEC ) + #define SIXTRL_RESULT_PTR_DEC + #endif /* !defined( SIXTRL_RESULT_PTR_DEC ) */ + #endif /* defined( __OPENCL_C_VERSION__ ) || defined( __CUDACC__ ) */ #else /* !defined( _GPUCODE ) */ @@ -543,6 +555,10 @@ #endif /* defined( __CUDACC__ ) */ + #if !defined( SIXTRL_RESULT_PTR_DEC ) + #define SIXTRL_RESULT_PTR_DEC + #endif /* !defined( SIXTRL_RESULT_PTR_DEC ) */ + #endif /* defined( _GPUCODE ) */ /* ------------------------------------------------------------------------- */ From c09f3ed5a597f4aaa164f51a1a7464e2f2bbc4c8 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:04:10 +0200 Subject: [PATCH 56/77] common: updates faddeeva implementations --- sixtracklib/common/be_beamfields/faddeeva.h | 1883 +++++++++++++++++ .../common/be_beamfields/faddeeva_cern.h | 128 -- 2 files changed, 1883 insertions(+), 128 deletions(-) create mode 100644 sixtracklib/common/be_beamfields/faddeeva.h delete mode 100644 sixtracklib/common/be_beamfields/faddeeva_cern.h diff --git a/sixtracklib/common/be_beamfields/faddeeva.h b/sixtracklib/common/be_beamfields/faddeeva.h new file mode 100644 index 000000000..7e65ffc9e --- /dev/null +++ b/sixtracklib/common/be_beamfields/faddeeva.h @@ -0,0 +1,1883 @@ +#ifndef SIXTACKLIB_COMMON_BE_BEAMFIELDS_FADDEEVA_HEADER_H__ +#define SIXTACKLIB_COMMON_BE_BEAMFIELDS_FADDEEVA_HEADER_H__ + +#if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) && !defined( __cplusplus ) + #include +#endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) && !defined( __cplusplus ) */ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/be_beamfields/definitions.h" + #include "sixtracklib/common/internal/math_constants.h" + #include "sixtracklib/common/internal/math_functions.h" + #include "sixtracklib/opencl/helpers.h" + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + #include "sixtracklib/common/be_beamfields/dawson_approx.h" + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + #include "sixtracklib/common/be_beamfields/dawson_coeff.h" + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if defined( __cplusplus ) && !defined( _GPUCODE ) +extern "C" { +#endif /* !defined( __cplusplus ) && !defined( _GPUCODE ) */ + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_cernlib_c_baseline_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_cernlib_c_upstream_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_cernlib_c_optimised_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_alg680_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(cerrf_abq2011_a_m_coeff)( + int const m ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_abq2011_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_abq2011_q1_coeff)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag, + SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT a_m, + SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT b_m + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(cerrf)( + SIXTRL_REAL_T in_real, SIXTRL_REAL_T in_imag, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT; + + +#if !defined( _GPUCODE ) +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_cernlib_c_baseline_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_cernlib_c_upstream_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_cernlib_c_optimised_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_alg680_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN SIXTRL_REAL_T +NS(cerrf_abq2011_a_m_coeff_ext)( int const m ) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_abq2011_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_abq2011_q1_coeff_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag, + SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT a_m, + SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT b_m +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_ext)( + SIXTRL_REAL_T in_real, SIXTRL_REAL_T in_imag, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; +#endif /* !defined( _GPUCODE ) */ + +/* ************************************************************************* */ + +SIXTRL_INLINE void NS(cerrf_cernlib_c_baseline_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + /* This function calculates the SIXTRL_REAL_T precision complex error fnct. + based on the algorithm of the FORTRAN function written at CERN by K. Koelbig + Program C335, 1970. See also M. Bassetti and G.A. Erskine, "Closed + expression for the electric field of a two-dimensional Gaussian charge + density", CERN-ISR-TH/80-06; */ + + int n, nc, nu; + real_type const a_constant = ( real_type )1.12837916709551; + real_type const xLim = ( real_type )5.33; + real_type const yLim = ( real_type )4.29; + real_type h, q, Saux, Sx, Sy, Tn, Tx, Ty, Wx, Wy, xh, xl, yh; + real_type Rx[ 33 ]; + real_type Ry[ 33 ]; + + if( ( y < yLim ) && ( x < xLim ) ) + { + q = ( ( real_type )1.0 - y / yLim ) * + NS(sqrt)( ( real_type )1.0 - ( x / xLim ) * ( x / xLim ) ); + h = 1.0 / (3.2 * q); + nc = 7 + (int) (23.0 * q); + xl = NS(pow)(h, ( real_type )( 1 - nc ) ); + xh = y + 0.5 / h; + yh = x; + nu = 10 + (int) (21.0 * q); + Rx[nu] = 0.; + Ry[nu] = 0.; + for (n = nu; n > 0; n--){ + Tx = xh + n * Rx[n]; + Ty = yh - n * Ry[n]; + Tn = Tx*Tx + Ty*Ty; + Rx[n-1] = 0.5 * Tx / Tn; + Ry[n-1] = 0.5 * Ty / Tn; + } + /* .... */ + Sx = 0.; + Sy = 0.; + for (n = nc; n>0; n--){ + Saux = Sx + xl; + Sx = Rx[n-1] * Saux - Ry[n-1] * Sy; + Sy = Rx[n-1] * Sy + Ry[n-1] * Saux; + xl = h * xl; + }; + Wx = a_constant * Sx; + Wy = a_constant * Sy; + } + else{ + xh = y; + yh = x; + Rx[0] = 0.; + Ry[0] = 0.; + for (n = 9; n>0; n--){ + Tx = xh + n * Rx[0]; + Ty = yh - n * Ry[0]; + Tn = Tx * Tx + Ty * Ty; + Rx[0] = 0.5 * Tx / Tn; + Ry[0] = 0.5 * Ty / Tn; + }; + Wx = a_constant * Rx[0]; + Wy = a_constant * Ry[0]; + } + if (y == 0.) {Wx = NS(exp)(-x * x);} + + *out_real = Wx; + *out_imag = Wy; +} + +SIXTRL_INLINE void NS(cerrf_cernlib_c_upstream_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + /* This function calculates the SIXTRL_REAL_T precision complex error fnct. + based on the algorithm of the FORTRAN function written at CERN by K. Koelbig + Program C335, 1970. See also M. Bassetti and G.A. Erskine, "Closed + expression for the electric field of a two-dimensional Gaussian charge + density", CERN-ISR-TH/80-06; */ + + int n = ( int )0u; + int N = ( int )0u; + int nu = ( int )SIXTRL_CERRF_CERNLIB_UPSTREAM_K; + + real_type h = ( real_type )0.0; + real_type two_h_n = ( real_type )0.0; + real_type inv_two_h = ( real_type )1.0; + real_type y_plus_h = y; + + real_type Saux, Sx, Sy, Tn, Tx, Ty, Wx, Wy; + real_type Rx[ SIXTRL_CERRF_CERNLIB_UPSTREAM_NMAX ]; + real_type Ry[ SIXTRL_CERRF_CERNLIB_UPSTREAM_NMAX ]; + + if( ( y < ( real_type )SIXTRL_CERRF_CERNLIB_UPSTREAM_Y0 ) && + ( x < ( real_type )SIXTRL_CERRF_CERNLIB_UPSTREAM_X0 ) ) + { + N = ( int )SIXTRL_CERRF_CERNLIB_UPSTREAM_N; + nu = ( int )SIXTRL_CERRF_CERNLIB_UPSTREAM_NU; + h = ( real_type )SIXTRL_CERRF_CERNLIB_UPSTREAM_H_0; + + two_h_n = ( real_type )2. * h; + y_plus_h += h; + inv_two_h = ( real_type )1. / two_h_n; + two_h_n = NS(pow)( two_h_n, N - 1 ); + + Rx[ nu ] = ( real_type )0.; + Ry[ nu ] = ( real_type )0.; + + for( n = nu ; n > 0 ; --n ) + { + Tx = y_plus_h + n * Rx[ n ]; + Ty = x - n * Ry[ n ]; + Tn = ( Tx * Tx ) + ( Ty * Ty ); + Rx[ n - 1 ] = ( real_type )0.5 * Tx / Tn; + Ry[ n - 1 ] = ( real_type )0.5 * Ty / Tn; + } + + Sx = Sy = ( real_type )0.0; + + for( n = N; n > 0 ; --n ) + { + Saux = Sx + two_h_n; + two_h_n *= inv_two_h; + Sx = Rx[ n - 1 ] * Saux - Ry[ n - 1 ] * Sy; + Sy = Rx[ n - 1 ] * Sy + Ry[ n - 1 ] * Saux; + } + + Wx = NS(MathConst_two_over_sqrt_pi)() * Sx; + Wy = NS(MathConst_two_over_sqrt_pi)() * Sy; + } + else + { + Rx[ 0 ] = Ry[ 0 ] = ( real_type )0.0; + + for( ; n > 0 ; --n ) + { + Tx = y_plus_h + ( n * Rx[ 0 ] ); + Ty = x - ( n * Ry[ 0 ] ); + Tn = ( Tx * Tx ) + ( Ty * Ty ); + Rx[ 0 ] = ( real_type )0.5 * Tx / Tn; + Ry[ 0 ] = ( real_type )0.5 * Ty / Tn; + } + + Wx = NS(MathConst_two_over_sqrt_pi)() * Rx[0]; + Wy = NS(MathConst_two_over_sqrt_pi)() * Ry[0]; + } + + if( y < ( real_type )SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_Y ) + Wx = NS(exp)( -x * x ); + + *out_real = Wx; + *out_imag = Wy; +} + +/** \fn void cerrf_cernlib_c_optimised_q1( double const, double const, double*, double* ) + * \brief calculates the Faddeeva function w(z) for z = x + i * y in Q1 + * + * \param[in] x real component of argument z + * \param[in] y imaginary component of argument z + * \param[out] out_x pointer to real component of result + * \param[out] out_y pointer to imanginary component of result + * + * \warning This function assumes that x and y are > 0 i.e., that z is + * from the first quadrant Q1 of the complex plane. Use cerrf if + * you need a more general function + * + * \note Based upon the algorithm developed by W. Gautschi 1970, + * "Efficient Computation of the Complex Error Function", + * SIAM Journal on Numerical Analysis, Vol. 7, Issue 1. 1970, + * pages 187-198, https://epubs.siam.org/doi/10.1137/0707012 + */ + +SIXTRL_INLINE void NS(cerrf_cernlib_c_optimised_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + /* This implementation corresponds closely to the previously used + * "CERNLib C" version, translated from the FORTRAN function written at + * CERN by K. Koelbig, Program C335, 1970. The main difference to + * Gautschi's formulation is a split in the main loop and the introduction + * of arrays to store the intermediate results as a consequence of this. + * The version implemented here should perform roughly equally well or even + * slightly better on modern out-of-order super-scalar CPUs but has + * drastically improved performance on GPUs and GPU-like systems. + * + * See also M. Bassetti and G.A. Erskine, + * "Closed expression for the electric field of a two-dimensional Gaussian + * charge density", CERN-ISR-TH/80-06; */ + + real_type inv_h2 = ( real_type )1.0; + real_type y_plus_h = y; + real_type temp, Rx, Ry, Sx, Sy, Wx, Wy, h2_n, nn; + + int nu = ( int )SIXTRL_CERRF_CERNLIB_K; + int N = 0; + int n = 0; + + bool use_taylor_sum = ( + ( y < ( real_type )SIXTRL_CERRF_CERNLIB_X0 ) && + ( x < ( real_type )SIXTRL_CERRF_CERNLIB_Y0 ) ); + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + bool const use_dawson_approx = ( + ( x >= ( real_type )SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X ) && + ( x <= ( real_type )SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) && + ( y <= ( real_type )SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y ) ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + #if defined( _GPUCODE ) && defined( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N ) + + #if ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 2 ) || \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) + unsigned int nu_minus_n; + SIXTRL_SHARED_DEC unsigned int nu_minus_n_w[ SIXTRL_WORKGROUP_SIZE ]; + #endif /* ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N >= 2 ) */ + + #if ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) + unsigned int N_max; + SIXTRL_SHARED_DEC unsigned int n_w[ SIXTRL_WORKGROUP_SIZE ]; + #endif /* ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) */ + + #if ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 4 ) + uint2 local_nu_n; + SIXTRL_SHARED_DEC uint2 nu_n[ SIXTRL_WORKGROUP_SIZE ]; + #endif /* SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) */ + + #endif + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + if( use_dawson_approx ) + { + use_taylor_sum = false; + nu = 0; + } + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + Rx = Ry = Sx = Sy = h2_n = ( real_type )0.0; + + /* R_0 ... rectangle with width SIXTRL_CERRF_CERNLIB_X0 and + * height SIXTRL_CERRF_CERNLIB_Y0. Inside R_0, w(z) is calculated using + * a truncated Taylor expansion. Outside, a Gauss--Hermite + * quadrature in the guise of a continuos fraction is used */ + + if( use_taylor_sum ) + { + #if !defined( SIXTRL_CERRF_CERNLIB_NO_GZ_WEIGHT_FN ) || \ + ( SIXTRL_CERRF_CERNLIB_NO_GZ_WEIGHT_FN == 0 ) + /* calculate g(z) = sqrt( 1 - (x/x0)^2 ) * ( 1 - y/y0 ) */ + temp = x * ( real_type )SIXTRL_CERRF_CERNLIB_INV_X0; + temp = ( ( real_type )1. + temp ) * ( ( real_type )1. - temp ); + temp = sqrt( temp ); + + temp *= ( ( real_type )1. - y * ( + real_type )SIXTRL_CERRF_CERNLIB_INV_Y0 ); + /*now: temp = g(z) */ + + #elif ( SIXTRL_CERRF_CERNLIB_NO_GZ_WEIGHT_FN == 1 ) && \ + defined( SIXTRL_CERRF_CERNLIB_GZ_WEIGHT_VALUE ) + temp = ( real_type )SIXTRL_CERRF_CERNLIB_GZ_WEIGHT_VALUE; + + #else /* !defined( FADDEEVA_NO_GZ_WEIGHT_FN ) */ + temp = ( real_type )1.; + + #endif /* defined( FADDEEVA_NO_GZ_WEIGHT_FN ) */ + + h2_n = ( real_type )SIXTRL_CERRF_CERNLIB_H_0 * temp; + y_plus_h += h2_n; + h2_n *= ( real_type )2.; + inv_h2 = ( real_type )1. / h2_n; + + N = ( int )SIXTRL_CERRF_CERNLIB_N_0 + + ( int )( ( double )SIXTRL_CERRF_CERNLIB_N_1 * temp ); + + #if !defined( _GPUCODE ) || \ + !defined( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N ) || \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 0 ) || \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 1 ) || \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 2 ) + + h2_n = NS(pow_int_exp)( h2_n, N - 1 ); + use_taylor_sum = ( h2_n > ( real_type )SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N ); + + #endif /* ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N <= 2 ) */ + + #if !defined( _GPUCODE ) || \ + !defined( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N ) || \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N != 1 ) + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + nu = ( int )SIXTRL_CERRF_CERNLIB_NU_0 + + ( int )( ( double )SIXTRL_CERRF_CERNLIB_NU_1 * temp ); + + #else + nu = ( y > ( real_type )SIXTRL_CERRF_CERNLIB_MIN_Y ) + ? ( int )SIXTRL_CERRF_CERNLIB_NU_0 + + ( int )( ( double )SIXTRL_CERRF_CERNLIB_NU_1 * temp ) + : 0; + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX ) */ + + #elif ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 1 ) + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + nu = N + ( int )SIXTRL_CERRF_CERNLIB_K; + #else + nu = ( y > ( real_type )SIXTRL_CERRF_CERNLIB_MIN_Y ) + ? N + ( int )SIXTRL_CERRF_CERNLIB_K : 0; + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX ) */ + + #endif /* SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N */ + } + + #if defined( _GPUCODE ) && \ + defined( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N ) && \ + ( ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 2 ) || \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) ) + + SIXTRL_ASSERT( nu >= N ); + SIXTRL_SHARED_BUILD_ARRAY( unsigned int, nu_minus_n_w, nu - N ); + SIXTRL_SHARED_FIND_MAX_PER_W( unsigned int, nu_minus_n_w, + ( unsigned int )SIXTRL_W_SIZE, nu_minus_n ); + + #if ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) + SIXTRL_SHARED_BUILD_ARRAY( unsigned int, n_w, N ); + SIXTRL_SHARED_FIND_MAX_PER_W( unsigned int, n_w, + ( unsigned int )SIXTRL_W_SIZE, N_max ); + + N = ( use_taylor_sum ) ? ( int )N_max : ( int )0; + + #endif /* ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) */ + nu = nu_minus_n + N; + + #endif /* ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N <= 3 ) */ + + #if defined( _GPUCODE ) && \ + defined( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N ) && \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 4 ) + SIXTRL_ASSERT( nu >= N ); + local_nu_n.x = nu - N; + local_nu_n.y = N; + + SIXTRL_SHARED_BUILD_ARRAY( uint2, nu_n, local_nu_n ); + SIXTRL_SHARED_FIND_MAX_PER_W( uint2, nu_n, + ( unsigned int )SIXTRL_W_SIZE, local_nu_n ); + + N = ( use_taylor_sum ) ? ( int )local_nu_n.y : ( int )0; + nu = ( int )local_nu_n.x + N; + + #endif /* ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 4 ) */ + + /* If h(z) is so close to 0 that it is practically 0, there is no + * point in doing the extra work for the Taylor series -> in that + * very unlikely case, use the continuos fraction & verify result! */ + + #if defined( _GPUCODE ) && \ + defined( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N ) && \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 3 ) || \ + ( SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N == 4 ) + + if( use_taylor_sum ) h2_n = NS(pow_int_exp)( h2_n, N - 1 ); + use_taylor_sum = ( h2_n > ( real_type )SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N ); + + #endif /* SIXTRL_CERRF_CERNLIB_FIND_MAX_NU_N */ + + #if !defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) || \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX != 1 ) + if( y <= ( real_type )SIXTRL_CERRF_CERNLIB_MIN_Y ) + Rx = exp( -x * x ) / NS(MathConst_two_over_sqrt_pi)(); + + #else /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + if( !use_dawson_approx ) + { + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + n = nu; + nn = ( real_type )n; + + /* z outside of R_0: continued fraction / Gauss - Hermite quadrature + * z inside of R_0: first iterations of recursion until n == N */ + for( ; n > N ; --n, nn -= ( real_type )1.0 ) + { + Wx = y_plus_h + nn * Rx; + Wy = x - nn * Ry; + temp = ( Wx * Wx ) + ( Wy * Wy ); + Rx = ( real_type )0.5 * Wx; + Ry = ( real_type )0.5 * Wy; + temp = ( real_type )1.0 / temp; + Rx *= temp; + Ry *= temp; + } + + /* loop rejects everything if z is not in R_0 because then n == 0 already; + * otherwise, N iterations until taylor expansion is summed up */ + for( ; n > 0 ; --n, nn -= ( real_type )1.0 ) + { + Wx = y_plus_h + nn * Rx; + Wy = x - nn * Ry; + temp = ( Wx * Wx ) + ( Wy * Wy ); + Rx = ( real_type )0.5 * Wx; + Ry = ( real_type )0.5 * Wy; + temp = ( real_type )1.0 / temp; + Rx *= temp; + Ry *= temp; + + Wx = h2_n + Sx; + h2_n *= inv_h2; + Sx = Rx * Wx - Ry * Sy; + Sy = Ry * Wx + Rx * Sy; + } + + if( use_taylor_sum ) + { + Wx = NS(MathConst_two_over_sqrt_pi)() * Sx; + Wy = NS(MathConst_two_over_sqrt_pi)() * Sy; + } + else + { + Wx = NS(MathConst_two_over_sqrt_pi)() * Rx; + Wy = NS(MathConst_two_over_sqrt_pi)() * Ry; + } + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + } + else + { + SIXTRL_CERRF_RESULT_DEC temp_wz_re; + SIXTRL_CERRF_RESULT_DEC temp_wz_im; + + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt ); + #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #else + NS(dawson_cerrf)( x, y, &temp_wz_re, &temp_wz_im ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ + + Wx = temp_wz_re; + Wy = temp_wz_im; + } + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + *out_x = Wx; + *out_y = Wy; +} + +SIXTRL_INLINE void NS(cerrf_alg680_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + real_type wz_re = ( real_type )0.0; + real_type wz_im = ( real_type )0.0; + + real_type const xs = x * ( real_type )SIXTRL_CERRF_ALG680_INV_X0; + real_type const ys = y * ( real_type )SIXTRL_CERRF_ALG680_INV_Y0; + real_type q_rho_squ = ( xs * xs ) + ( ys * ys ); + + bool use_power_series = ( q_rho_squ < ( + real_type )SIXTRL_CERRF_ALG680_QRHO_SQU_POWER_SERIES_LIMIT ); + + real_type x_quad, y_quad, exp_minus_x_quad, factor_cos, factor_sin; + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + bool const use_dawson_approx = ( + ( x >= ( real_type )SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MIN_X ) && + ( x <= ( real_type )SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_X ) && + ( y <= ( real_type )SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_Y ) ); + + use_power_series &= !use_dawson_approx; + #endif /* ( CERRF_USE_DAWSON_FUNCTION == 1 ) */ + + SIXTRL_ASSERT( x <= ( real_type )SIXTRL_CERRF_ALG680_REAL_MAX_X ); + SIXTRL_ASSERT( y <= ( real_type )SIXTRL_CERRF_ALG680_REAL_MAX_Y ); + SIXTRL_ASSERT( out_x != SIXTRL_NULLPTR ); + SIXTRL_ASSERT( out_y != SIXTRL_NULLPTR ); + + factor_sin = factor_cos = exp_minus_x_quad = ( real_type )1.0; + x_quad = y_quad = ( real_type )0.0; + + if( use_power_series ) + { + x_quad = ( x - y ) * ( x + y ); + y_quad = ( real_type )2.0 * x * y; + exp_minus_x_quad = NS(exp)( -x_quad ); + NS(sincos)( y_quad, &factor_sin, &factor_cos ); + } + + factor_cos *= exp_minus_x_quad; + factor_sin *= exp_minus_x_quad; + + if( use_power_series ) + { + real_type temp = ( real_type )0.0; + + real_type const q_rho = NS(sqrt)( q_rho_squ ) * ( + ( real_type )SIXTRL_CERRF_ALG680_QRHO_C0 - + ( real_type )SIXTRL_CERRF_ALG680_QRHO_C1 * ys ); + + int const N = ( int )NS(round)( + ( real_type )SIXTRL_CERRF_ALG680_N_R0 + + ( real_type )SIXTRL_CERRF_ALG680_N_R1 * q_rho ); + + int ii = N; + + real_type kk = ( real_type )N; + real_type jj = ( real_type )2.0 * kk + ( real_type )1.0; + real_type uu = ( real_type )0.0; + real_type vv = ( real_type )0.0; + + wz_re = ( real_type )1.0 / jj; + + for( ; ii > 0 ; --ii, kk -= ( real_type )1.0 ) + { + real_type const c1 = ( wz_re * x_quad ) - ( wz_im * y_quad ); + real_type const c2 = ( wz_re * y_quad ) + ( wz_im * x_quad ); + real_type const inv_kk = ( real_type )1.0 / kk; + + jj -= ( real_type )2.0; + temp = c1 * inv_kk; + wz_im = c2 * inv_kk; + wz_re = temp + ( real_type )1.0 / jj; + } + + uu = ( real_type )1.0; + uu -= NS(MathConst_two_over_sqrt_pi)() * ( wz_re * y + wz_im * x ); + vv = NS(MathConst_two_over_sqrt_pi)() * ( wz_re * x - wz_im * y ); + + wz_re = +uu * factor_cos + vv * factor_sin; + wz_im = -uu * factor_sin + vv * factor_cos; + } + #if defined( CERRF_USE_DAWSON_FUNCTION ) && ( CERRF_USE_DAWSON_FUNCTION == 1 ) + else if( !use_dawson_approx ) + #else /* !CERRF_USE_DAWSON_FUNCTION */ + else + #endif /* CERRF_USE_DAWSON_FUNCTION */ + { + bool use_cont_fraction = ( ( real_type )q_rho_squ >= ( + real_type )SIXTRL_CERRF_ALG680_QRHO_SQU_CONT_FRAC_LIMIT ); + + real_type const q_rho = ( use_cont_fraction ) + ? NS(sqrt)( q_rho_squ ) + : ( ( real_type )1.0 - ys ) * NS(sqrt)( ( real_type )1.0 - q_rho_squ ); + + real_type h = ( real_type )0.0; + real_type two_h_n = ( real_type )0.0; + real_type inv_two_h = ( real_type )1.0; + + real_type rx = ( real_type )0.0; + real_type ry = ( real_type )0.0; + real_type sx = ( real_type )0.0; + real_type sy = ( real_type )0.0; + real_type nn_plus_1; + + int n; + int N = 0; + int nu = 0; + + if( !use_cont_fraction ) + { + h = ( real_type )SIXTRL_CERRF_ALG680_H1 * q_rho; + two_h_n = ( real_type )2.0 * h; + + if( two_h_n > ( real_type )SIXTRL_CERRF_ALG680_MIN_TWO_H_VALUE ) + { + inv_two_h = ( real_type )1.0 / two_h_n; + } + + N = ( int )NS(round)( + ( real_type )SIXTRL_CERRF_ALG680_N_S0 + + ( real_type )SIXTRL_CERRF_ALG680_N_S1 * q_rho ); + + nu = ( int )NS(round)( + ( real_type )SIXTRL_CERRF_ALG680_NU_S0 + + ( real_type )SIXTRL_CERRF_ALG680_NU_S1 * q_rho ); + + two_h_n = NS(pow_int_exp)( two_h_n, N ); + + if( two_h_n > ( real_type )SIXTRL_CERRF_ALG680_MIN_POW_2H_N ) + { + use_cont_fraction = false; + two_h_n = ( real_type )0.0; + inv_two_h = ( real_type )1.0; + N = 0; + } + } + else + { + nu = ( int )NS(round)( + ( real_type )SIXTRL_CERRF_ALG680_K0_CONT_FRACTION + + ( real_type )SIXTRL_CERRF_ALG680_K1_CONT_FRACTION / + ( ( real_type )SIXTRL_CERRF_ALG680_K2_CONT_FRACTION + + ( real_type )SIXTRL_CERRF_ALG680_K3_CONT_FRACTION * q_rho ) ); + } + + n = nu + 1; + nn_plus_1 = ( real_type )n; + --n; + + for( ; n > N ; --n, nn_plus_1 -= ( real_type )1.0 ) + { + real_type const tx = y + h + nn_plus_1 * rx; + real_type const ty = x - nn_plus_1 * ry; + real_type temp = ( tx * tx ) + ( ty * ty ); + rx = ( real_type )0.5 * tx; + ry = ( real_type )0.5 * ty; + temp = ( real_type )1.0 / temp; + rx *= temp; + ry *= temp; + } + + for( ; n >= 0 ; --n, nn_plus_1 -= ( real_type )1.0 ) + { + real_type const ty = x - nn_plus_1 * ry; + real_type tx = y + h + nn_plus_1 * rx; + real_type temp = ( tx * tx ) + ( ty * ty ); + rx = ( real_type )0.5 * tx; + ry = ( real_type )0.5 * ty; + temp = ( real_type )1.0 / temp; + rx *= temp; + ry *= temp; + + tx = two_h_n + sx; + two_h_n *= inv_two_h; + sx = ( rx * tx ) - ( ry * sy ); + sy = ( ry * tx ) + ( rx * sy ); + } + + if( !use_cont_fraction ) + { + wz_re = NS(MathConst_two_over_sqrt_pi)() * rx; + wz_im = NS(MathConst_two_over_sqrt_pi)() * ry; + } + else + { + wz_re = NS(MathConst_two_over_sqrt_pi)() * sx; + wz_im = NS(MathConst_two_over_sqrt_pi)() * sy; + } + } + #if defined( CERRF_USE_DAWSON_FUNCTION ) && \ + ( CERRF_USE_DAWSON_FUNCTION == 1 ) + else + { + SIXTRL_CERRF_RESULT_DEC real_type temp_wz_re; + SIXTRL_CERRF_RESULT_DEC real_type temp_wz_im; + + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt ); + #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF == 0 ) */ + NS(dawson_cerrf)( x, y, &temp_wz_re, &temp_wz_im ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ + + wz_re = temp_wz_re; + wz_im = temp_wz_im; + } + #else /* ( CERRF_USE_DAWSON_FUNCTION != 1 ) */ + + if( y < ( real_type )SIXTRL_CERRF_ALG680_MIN_Y ) + { + real_type const x_squ = x * x; + wz_re = ( x_squ < ( real_type )SIXTRL_ALG680_MAX_REAL_MAX_EXP ) + ? NS(exp)( -x_squ ) : ( real_type )0.0; + } + #endif /* ( CERRF_USE_DAWSON_FUNCTION != 1 ) */ + + *out_x = wz_re; + *out_y = wz_im; +} + + +SIXTRL_INLINE SIXTRL_REAL_T NS(cerrf_abq2011_a_m_coeff)( + int const m ) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + real_type a_m = ( real_type )0.0; + + SIXTRL_ASSERT( m < ( int )SIXTRL_CERRF_ABQ2011_N_FOURIER ); + + #if defined( SIXTRL_CERRF_ABQ2011_N_FOURIER ) && \ + ( SIXTRL_CERRF_ABQ2011_N_FOURIER == 24 ) && \ + defined( SIXTRL_CERRF_ABQ2011_TM ) && ( SIXTRL_CERRF_ABQ2011_TM == 12 ) + + switch( m ) + { + case 0: { a_m = ( real_type )0.295408975150919337883027913890; break; } /* a_00 */ + case 1: { a_m = ( real_type )0.275840233292177084395258287749; break; } /* a_01 */ + case 2: { a_m = ( real_type )0.224573955224615866231619198223; break; } /* a_02 */ + case 3: { a_m = ( real_type )0.159414938273911722757388079389; break; } /* a_03 */ + case 4: { a_m = ( real_type )0.0986657664154541891084237249422; break; } /* a_04 */ + case 5: { a_m = ( real_type )0.0532441407876394120414705837561; break; } /* a_05 */ + case 6: { a_m = ( real_type )0.0250521500053936483557475638078; break; } /* a_06 */ + case 7: { a_m = ( real_type )0.0102774656705395362477551802420; break; } /* a_07 */ + case 8: { a_m = ( real_type )0.00367616433284484706364335443079; break; } /* a_08 */ + case 9: { a_m = ( real_type )0.00114649364124223317199757239908; break; } /* a_09 */ + case 10: { a_m = ( real_type )0.000311757015046197600406683642851; break; } /* a_10 */ + case 11: { a_m = ( real_type )0.0000739143342960301487751427184143; break; } /* a_11 */ + case 12: { a_m = ( real_type )0.0000152794934280083634658979605774; break; } /* a_12 */ + case 13: { a_m = ( real_type )0.00000275395660822107093261423133381; break; } /* a_13 */ + case 14: { a_m = ( real_type )4.32785878190124505246159684324E-7; break; } /* a_14 */ + case 15: { a_m = ( real_type )5.93003040874588104132914772669E-8; break; } /* a_15 */ + case 16: { a_m = ( real_type )7.08449030774820424708618991843E-9; break; } /* a_16 */ + case 17: { a_m = ( real_type )7.37952063581678039121116498488E-10; break; } /* a_17 */ + case 18: { a_m = ( real_type )6.70217160600200763046136003593E-11; break; } /* a_18 */ + case 19: { a_m = ( real_type )5.30726516347079017807414252726E-12; break; } /* a_19 */ + case 20: { a_m = ( real_type )3.66432411346763916925386157070E-13; break; } /* a_20 */ + case 21: { a_m = ( real_type )2.20589494494103134281934595834E-14; break; } /* a_21 */ + case 22: { a_m = ( real_type )1.15782686262855878932236226031E-15; break; } /* a_22 */ + case 23: { a_m = ( real_type )5.29871142946730483659082681849E-17; break; } /* a_23 */ + default: { a_m = ( real_type )0.0; } + }; + + #endif /* N_FOURIER == 24 && TM = 12 */ + + return a_m; +} + +#if !defined( SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M ) + #define SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( \ + T, m, a_m, exp_cos_tm_x, exp_sin_tm_x, two_over_sqrt_pi, \ + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ) \ + \ + c4 = ( -exp_cos_tm_x ) - ( T )1.;\ + c3 = ( T )( ( m ) * ( m ) ) * ( T )( two_over_sqrt_pi ) - ( c1 ); \ + temp = ( c2_squ ) + c3 * c3; \ + sn_re = ( c3 * c4 ) + ( ( c2 ) * ( exp_sin_tm_x ) );\ + sn_im = ( ( c2 ) * c4 ) - ( c3 * ( exp_sin_tm_x ) );\ + sn_re *= a_m; \ + sn_im *= a_m; \ + temp = ( T )1.0 / temp; \ + sum_re += sn_re * temp; \ + sum_im += sn_im * temp +#endif + +#if !defined( SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M ) + #define SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( \ + T, m, a_m, exp_cos_tm_x, exp_sin_tm_x, two_over_sqrt_pi, \ + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ) \ + \ + c4 = ( exp_cos_tm_x ) - ( T )1.;\ + c3 = ( T )( ( m ) * ( m ) ) * ( T )( two_over_sqrt_pi ) - ( c1 ); \ + temp = ( c2_squ ) + c3 * c3; \ + sn_re = ( c3 * c4 ) - ( ( c2 ) * ( exp_sin_tm_x ) );\ + sn_im = ( ( c2 ) * c4 ) + ( c3 * ( exp_sin_tm_x ) );\ + sn_re *= a_m; \ + sn_im *= a_m; \ + temp = ( T )1.0 / temp; \ + sum_re += sn_re * temp; \ + sum_im += sn_im * temp +#endif + +SIXTRL_INLINE void NS(cerrf_abq2011_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + real_type temp = ( real_type )0.0; + real_type const x_squ = x * x; + real_type const y_squ = y * y; + bool use_fourier_sum = true; + + real_type wz_re = ( real_type )0.0; + real_type wz_im = ( real_type )0.0; + + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) + bool const use_continued_fraction = ( ( x_squ + y_squ ) >= ( + real_type )SIXTRL_CERRF_ABQ2011_CONT_FRACTION_LIMIT_SQU ); + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + bool const use_dawson_approx = ( + ( x >= ( real_type )SIXTRL_CERRF_ABQ2011_DAWSON_X_MIN ) && + ( x <= ( real_type )SIXTRL_CERRF_ABQ2011_DAWSON_X_MAX ) && + ( y <= ( real_type )SIXTRL_CERRF_ABQ2011_DAWSON_Y_MAX ) ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + use_fourier_sum &= !use_dawson_approx; + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) + use_fourier_sum &= !use_continued_fraction; + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ + + SIXTRL_ASSERT( out_x != SIXTRL_NULLPTR ); + SIXTRL_ASSERT( out_y != SIXTRL_NULLPTR ); + + if( use_fourier_sum ) + { + SIXTRL_RESULT_PTR_DEC real_type exp_cos_tm_x, exp_sin_tm_x; + + real_type sum_re = ( real_type )0.0; + real_type sum_im = ( real_type )0.0; + + real_type sn_re = ( real_type )0.0; + real_type sn_im = ( real_type )0.0; + + real_type c3 = ( real_type )0.0; + real_type c4 = ( real_type )1.0; + + real_type const c1 = ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU * + ( x + y ) * ( x - y ); + + real_type const c2 = ( real_type )2.0 * + ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU * x * y; + + real_type const c2_squ = c2 * c2; + real_type a_m; + + #if !defined( SIXTRL_CERRF_ABQ2011_FORCE_UNROLLED_LOOP ) || \ + ( SIXTRL_CERRF_ABQ2011_FORCE_UNROLLED_LOOP != 1 ) + int m = 1; + #endif /* ( SIXTRL_CERRF_ABQ2011_FORCE_UNROLLED_LOOP != 1 ) */ + + temp = ( real_type )SIXTRL_CERRF_ABQ2011_TM * x; + NS(sincos)( temp, &exp_sin_tm_x, &exp_cos_tm_x ); + + temp = NS(exp)( -( ( real_type )SIXTRL_CERRF_ABQ2011_TM * y ) ); + exp_sin_tm_x *= temp; + exp_cos_tm_x *= temp; + + c4 -= exp_cos_tm_x; + + /* Contribution for m = 0 */ + temp = x_squ + y_squ; + temp *= ( real_type )SIXTRL_CERRF_ABQ2011_TM; + wz_re = ( y * c4 ) + ( x * exp_sin_tm_x ); + wz_im = ( x * c4 ) - ( y * exp_sin_tm_x ); + temp = ( real_type )1. / temp; + wz_re *= temp; + wz_im *= temp; + + #if defined( SIXTRL_CERRF_ABQ2011_FORCE_UNROLLED_LOOP ) && \ + ( SIXTRL_CERRF_ABQ2011_FORCE_UNROLLED_LOOP == 1 ) + + /* Manually unrolled loop */ + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 1 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A01 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A01; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 1, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 1 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 2 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A02 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A02; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 2, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 2 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 3 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A03 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A03; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 3, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 3 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 4 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A04 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A04; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 4, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 4 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 5 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A05 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A05; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 5, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 5 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 6 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A06 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A06; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 6, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 6 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 7 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A07 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A07; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 7, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 7 */ + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 8 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A08 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A08; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 8, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 8 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 9 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A09 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A09; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 9, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 9 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 10 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A10 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A10; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 10, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 10 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 11 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A11 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A11; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 11, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 11 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 12 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A12 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A12; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 12, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 12 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 13 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A13 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A13; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 13, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 13 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 14 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A14 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A14; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 14, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 14 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 15 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A15 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A15; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 15, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 15 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 16 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A16 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A16; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 16, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 16 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 17 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A17 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A17; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 17, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 17 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 18 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A18 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A18; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 18, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 18 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 19 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A19 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A19; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 19, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 19 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 20 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A20 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A20; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 20, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 20 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 21 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A21 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A21; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 21, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 221 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 22 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A22 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A22; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 22, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 22 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 23 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A23 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A23; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_ODD_M( real_type, 23, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 23 */ + + #if ( SIXTRL_CERRF_ABQ2011_N_FOURIER > 24 ) && \ + defined( SIXTRL_CERRF_ABQ2011_A24 ) + a_m = ( real_type )SIXTRL_CERRF_ABQ2011_A24; + SIXTRACKLIB_CERRF_ABQ2011_FOURIER_SUM_EVEN_M( real_type, 24, a_m, + exp_cos_tm_x, exp_sin_tm_x, NS(MathConst_two_over_sqrt_pi(), + c1, c2, c2_squ, c3, c4, temp, sn_re, sn_im, sum_re, sum_im ); + #endif /* SIXTRL_CERRF_ABQ2011_N_FOURIER > 24 */ + + #else /* ( SIXTRL_CERRF_ABQ2011_FORCE_UNROLLED_LOOP != 1 ) */ + + for( ; m < ( int )SIXTRL_CERRF_ABQ2011_N_FOURIER ; ++m ) + { + exp_cos_tm_x = -exp_cos_tm_x; + exp_sin_tm_x = -exp_sin_tm_x; + c4 = exp_cos_tm_x - ( real_type )1.; + c3 = ( real_type )( m * m ) * NS(MathConst_two_over_sqrt_pi)() - c1; + + temp = c2_squ + c3 * c3; + sn_re = ( c3 * c4 ) - ( c2 * exp_sin_tm_x ); + sn_im = ( c2 * c4 ) + ( c3 * exp_sin_tm_x ); + a_m = NS(cerrf_abq2011_a_m_coeff)( m ); + sn_re *= a_m; + sn_im *= a_m; + temp = ( real_type )1.0 / temp; + sum_re += sn_re * temp; + sum_im += sn_im * temp; + } + + #endif /* ( SIXTRL_CERRF_ABQ2011_FORCE_UNROLLED_LOOP ) */ + /* normalize the sum + apply common pre-factor i * z */ + + temp = ( x * sum_im ) + ( y * sum_re ); + sum_im = ( x * sum_re ) - ( y * sum_im ); + + wz_re -= temp * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; + wz_im += sum_im * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; + } + + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) + else if( use_continued_fraction ) + { + real_type rx = ( real_type )0.0; + real_type ry = ( real_type )0.0; + real_type nn = ( real_type )CERRF_CONTINUOUS_FRACTION_K; + + for( ; nn > ( real_type )0. ; nn -= ( real_type )1. ) + { + wz_re = in_y + nn * rx; + wz_im = in_x - nn * ry; + temp = ( wz_re * wz_re + wz_im * wz_im ); + + rx = ( real_type )0.5 * wz_re; + ry = ( real_type )0.5 * wz_im; + temp = ( real_type )1.0 / temp; + + rx *= temp; + ry *= temp; + } + + wz_re = NS(MathConst_two_over_sqrt_pi)() * rx; + wz_im = NS(MathConst_two_over_sqrt_pi)() * ry; + } + + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + else if( use_dawson_approx ) + { + SIXTRL_CERRF_RESULT_DEC temp_re, temp_im; + + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt ); + #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) */ + NS(dawson_cerrf)( x, y, &temp_wz_re, &temp_wz_im ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ + + wz_re = temp_re; + wz_im = temp_im; + } + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + *out_x = wz_re; + *out_y = wz_im; +} + + +SIXTRL_INLINE void NS(cerrf_abq2011_q1_coeff)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y, + SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT a_m, + SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT b_n + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + real_type temp = ( real_type )0.0; + real_type const x_squ = x * x; + real_type const y_squ = y * y; + bool use_fourier_sum = true; + + real_type wz_re = ( real_type )0.0; + real_type wz_im = ( real_type )0.0; + + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) + bool const use_continued_fraction = ( ( x_squ + y_squ ) >= ( + real_type )SIXTRL_CERRF_ABQ2011_CONT_FRACTION_LIMIT_SQU ); + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + bool const use_dawson_approx = ( + ( x >= ( real_type )SIXTRL_CERRF_ABQ2011_DAWSON_X_MIN ) && + ( x <= ( real_type )SIXTRL_CERRF_ABQ2011_DAWSON_X_MAX ) && + ( y <= ( real_type )SIXTRL_CERRF_ABQ2011_DAWSON_Y_MAX ) ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + #if defined( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX == 1 ) + int N_POLE = ( int )-1; + bool use_pole_taylor_approx = ( + #if !defined( SIXTRL_CERRF_ABQ2011_N_TAYLOR ) || \ + ( SIXTRL_CERRF_ABQ2011_N_TAYLOR < 1 ) + ( false ) && + #endif /* ( !SIXTRL_CERRF_ABQ2011_N_TAYLOR ) */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + ( !use_dawson_approx ) && + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) + ( !use_continued_fraction ) && + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ + + ( b_n != SIXTRL_NULLPTR ) && + ( y < ( ( real_type )SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST ) ) && + ( x < ( ( real_type )SIXTRL_CERRF_ABQ2011_N_FOURIER * + ( real_type )SIXTRL_CERRF_ABQ2011_PI_OVER_TM + + ( real_type )SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST ) ) ); + + if( use_pole_taylor_approx ) + { + real_type d_pole_squ = y_squ; + N_POLE = ( int )NS(round)( x * ( real_type )CERRF_TM_OVER_PI ); + temp = x - ( ( real_type )SIXTRL_CERRF_ABQ2011_PI_OVER_TM * + ( real_type )N_POLE ); + + d_pole_squ += temp * temp; + if( d_pole_squ >= ( real_type )SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST_SQU ) + { + use_pole_taylor_approx = false; + N_POLE = -1; + } + + SIXTRL_ASSERT( ( N_POLE == ( int )-1 ) || ( N_POLE >= ( int )0u ) ); + SIXTRL_ASSERT( N_POLE <= ( int )SIXTRL_CERRF_ABQ2011_N_FOURIER ); + } + #else /* ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX != 1 ) */ + ( void )b_n; + + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX ) */ + + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) + use_fourier_sum &= !use_continued_fraction; + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + use_fourier_sum &= !use_dawson_approx; + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + #if defined( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX == 1 ) + use_fourier_sum &= !use_pole_taylor_approx; + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX == 1 ) */ + + SIXTRL_ASSERT( out_x != SIXTRL_NULLPTR ); + SIXTRL_ASSERT( out_y != SIXTRL_NULLPTR ); + + if( use_fourier_sum ) + { + SIXTRL_RESULT_PTR_DEC real_type exp_cos_tm_x, exp_sin_tm_x; + + real_type sum_re = ( real_type )0.0; + real_type sum_im = ( real_type )0.0; + + real_type sn_re = ( real_type )0.0; + real_type sn_im = ( real_type )0.0; + + real_type c3 = ( real_type )0.0; + real_type c4 = ( real_type )1.0; + + real_type const c1 = ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU * + ( x + y ) * ( x - y ); + + real_type const c2 = ( real_type )2.0 * + ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU * x * y; + + real_type const c2_squ = c2 * c2; + real_type a_m_value; + int m = ( int )1u; + + temp = ( real_type )SIXTRL_CERRF_ABQ2011_TM * x; + NS(sincos)( temp, &exp_sin_tm_x, &exp_cos_tm_x ); + + temp = NS(exp)( -( ( real_type )SIXTRL_CERRF_ABQ2011_TM * y ) ); + exp_sin_tm_x *= temp; + exp_cos_tm_x *= temp; + + c4 -= exp_cos_tm_x; + + /* Contribution for m = 0 */ + temp = x_squ + y_squ; + temp *= ( real_type )SIXTRL_CERRF_ABQ2011_TM; + wz_re = ( y * c4 ) + ( x * exp_sin_tm_x ); + wz_im = ( x * c4 ) - ( y * exp_sin_tm_x ); + temp = ( real_type )1. / temp; + wz_re *= temp; + wz_im *= temp; + + for( ; m < ( int )SIXTRL_CERRF_ABQ2011_N_FOURIER ; ++m ) + { + exp_cos_tm_x = -exp_cos_tm_x; + exp_sin_tm_x = -exp_sin_tm_x; + c4 = exp_cos_tm_x - ( real_type )1.; + c3 = ( real_type )( m * m ) * NS(MathConst_two_over_sqrt_pi)() - c1; + + temp = c2_squ + c3 * c3; + sn_re = ( c3 * c4 ) - ( c2 * exp_sin_tm_x ); + sn_im = ( c2 * c4 ) + ( c3 * exp_sin_tm_x ); + a_m_value = a_m[ m ]; + sn_re *= a_m_value; + sn_im *= a_m_value; + temp = ( real_type )1.0 / temp; + sum_re += sn_re * temp; + sum_im += sn_im * temp; + } + + /* normalize the sum + apply common pre-factor i * z */ + + temp = ( x * sum_im ) + ( y * sum_re ); + sum_im = ( x * sum_re ) - ( y * sum_im ); + + wz_re -= temp * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; + wz_im += sum_im * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; + } + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) + else if( use_continued_fraction ) + { + real_type rx = ( real_type )0.0; + real_type ry = ( real_type )0.0; + real_type nn = ( real_type )CERRF_CONTINUOUS_FRACTION_K; + + for( ; nn > ( real_type )0. ; nn -= ( real_type )1. ) + { + wz_re = in_y + nn * rx; + wz_im = in_x - nn * ry; + temp = ( wz_re * wz_re + wz_im * wz_im ); + + rx = ( real_type )0.5 * wz_re; + ry = ( real_type )0.5 * wz_im; + temp = ( real_type )1.0 / temp; + + rx *= temp; + ry *= temp; + } + + wz_re = NS(MathConst_two_over_sqrt_pi)() * rx; + wz_im = NS(MathConst_two_over_sqrt_pi)() * ry; + } + + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + else if( use_dawson_approx ) + { + SIXTRL_CERRF_RESULT_DEC temp_re, temp_im; + + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt ); + #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #else + NS(dawson_cerrf)( x, y, &temp_wz_re, &temp_wz_im ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ + + wz_re = temp_re; + wz_im = temp_im; + } + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + #if defined( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX == 1 ) + else if( use_pole_taylor_approx ) + { + real_type dz_nn_im = y; + real_type const dz_re = x - ( ( + real_type )SIXTRL_CERRF_ABQ2011_PI_OVER_TM * ( real_type )N_POLE ); + real_type dz_nn_re = dz_re; + real_type b_n_value; + + int ii = 2; + int jj = 2 * N_POLE * ( int )SIXTRL_CERRF_ABQ2011_N_TAYLOR; + + CERRF_ASSERT( N_POLE >= 0 ); + CERRF_ASSERT( N_POLE <= ( int )SIXTRL_CERRF_ABQ2011_N_FOURIER ); + + /* wz = Re(b0) + i * Im(b0) */ + wz_re = b_n[ jj++ ]; + wz_im = b_n[ jj++ ]; + + /* wz += b1 * ( dz_re + i * in_y ) */ + b_n_value = b_n[ jj++ ]; + wz_re += b_n_value * dz_nn_re; + wz_im += b_n_value * dz_nn_im; + + b_n_value = b_n[ jj++ ]; + wz_re -= b_n_value * dz_nn_im; + wz_im += b_n_value * dz_nn_re; + + for( ; ii < ( int )SIXTRL_CERRF_ABQ2011_N_TAYLOR ; ++ii ) + { + temp = dz_nn_re * dz_re - dz_nn_im * in_y; + dz_nn_im *= dz_re; + dz_nn_im += dz_nn_re * in_y; + dz_nn_re = temp; + + b_n_value = b_n[ jj++ ]; + wz_re += b_n_value * dz_nn_re; + wz_im += b_n_value * dz_nn_im; + + b_n_value = b_n[ jj++ ]; + wz_re -= b_n_value * dz_nn_im; + wz_im += b_n_value * dz_nn_re; + } + } + + #endif /* ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX == 1 ) */ + + *out_x = wz_re; + *out_y = wz_im; +} + + +SIXTRL_INLINE void NS(cerrf_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT +{ + #if ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_BASELINE ) || \ + ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + ( void )xi; + ( void )Fz_xi; + ( void )Fz_nt; + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + ( void )Fz_kk_xi; + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + #endif /* no Dawson's approximaion! */ + + #if ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_BASELINE ) + NS(cerrf_cernlib_c_baseline_q1)( x, y, out_x, out_y ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) + NS(cerrf_cernlib_c_upstream_q1)( x, y, out_x, out_y ); + #elif SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_alg680_q1)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_alg680_q1)( x, y, out_x, out_y, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_alg680_q1)( x, y, out_x, out_y ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + + #elif SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_abq2011_q1)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_abq2011_q1)( x, y, out_x, out_y, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_abq2011_q1)( x, y, out_x, out_y ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + + #else /* ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_OPTIMISED ) */ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_cernlib_c_optimised_q1)( + x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_cernlib_c_optimised_q1)( x, y, out_x, out_y, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_cernlib_c_optimised_q1)( x, y, out_x, out_y ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + + #endif /* SIXTRL_CERRF_METHOD */ +} + + +/** \fn void cerrf( double x, double y, double* out_x, double* out_y ) + * \brief calculates the Faddeeva function w(z) for general z = x + i * y + * + * Calls the correct cerrf_*_q1 function according to SIXTRL_CERRF_METHOD + * internally for |x| and |y| on quadrant Q1 and transforms the result to + * Q2, Q3, and Q4 before returning them via out_x and out_y. + * + * \param[in] x real component of argument z + * \param[in] y imaginary component of argument z + * \param[out] out_x pointer to real component of result + * \param[out] out_y pointer to imanginary component of result + * + */ + +SIXTRL_INLINE void NS(cerrf)( + SIXTRL_REAL_T x, SIXTRL_REAL_T y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + real_type const sign_x = ( real_type )( ( x >= ( real_type )0. ) - + ( x < ( real_type )0. ) ); + + real_type const sign_y = ( real_type )( ( y >= ( real_type )0. ) - + ( y < ( real_type )0. ) ); + + SIXTRL_CERRF_RESULT_DEC real_type Wx; + SIXTRL_CERRF_RESULT_DEC real_type Wy; + + x *= sign_x; + y *= sign_y; + + #if ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_BASELINE ) || \ + ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + ( void )xi; + ( void )Fz_xi; + ( void )Fz_nt; + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + ( void )Fz_kk_xi; + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + #endif /* no Dawson's approximaion! */ + + #if ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_BASELINE ) + NS(cerrf_cernlib_c_baseline_q1)( x, y, &Wx, &Wy ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) + NS(cerrf_cernlib_c_upstream_q1)( x, y, &Wx, &Wy ); + #elif SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_alg680_q1)( x, y, &Wx, &Wy, xi, Fz_xi, Fz_nt ); + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_alg680_q1)( x, y, &Wx, &Wy, xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_alg680_q1)( x, y, &Wx, &Wy ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + + #elif SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_abq2011_q1)( x, y, &Wx, &Wy, xi, Fz_xi, Fz_nt ); + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_abq2011_q1)( x, y, &Wx, &Wy, xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_abq2011_q1)( x, y, &Wx, &Wy ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + + #else /* ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_OPTIMISED ) */ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_cernlib_c_optimised_q1)( + x, y, &Wx, &Wy, xi, Fz_xi, Fz_nt ); + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_cernlib_c_optimised_q1)( x, y, &Wx, &Wy, + xi, Fz_xi, Fz_nt, Fz_kk_xi ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_cernlib_c_optimised_q1)( x, y, &Wx, &Wy ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + + #endif /* SIXTRL_CERRF_METHOD */ + + if( sign_y < ( real_type )0.0 ) /* Quadrants Q3 and Q4 */ + { + real_type const exp_arg = ( y - x ) * ( y + x ); + real_type const trig_arg = ( real_type )2. * x * y; + real_type const exp_factor = ( real_type )2. * exp( exp_arg ); + + SIXTRL_RESULT_PTR_DEC real_type sin_arg; + SIXTRL_RESULT_PTR_DEC real_type cos_arg; + + NS(sincos)( trig_arg, &sin_arg, &cos_arg ); + Wx = exp_factor * cos_arg - Wx; + Wy = exp_factor * sin_arg + Wy; + } + + *out_x = Wx; + *out_y = sign_x * Wy; /* Takes care of Quadrants Q2 and Q3 */ +} + +#if defined( __cplusplus ) && !defined( _GPUCODE ) +} +#endif /* !defined( __cplusplus ) && !defined( _GPUCODE ) */ +#endif /* SIXTACKLIB_COMMON_BE_BEAMFIELDS_FADDEEVA_HEADER_H__ */ diff --git a/sixtracklib/common/be_beamfields/faddeeva_cern.h b/sixtracklib/common/be_beamfields/faddeeva_cern.h deleted file mode 100644 index 60b3ba60a..000000000 --- a/sixtracklib/common/be_beamfields/faddeeva_cern.h +++ /dev/null @@ -1,128 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// -// FILE NAME -// ErrorFunctions.c -// -// 02/19/2015, 08/18/2015 -// -// AUTHORS -// Hannes Bartosik, Adrian Oeftiger -// -// DESCRIPTION -// Error functions -// -///////////////////////////////////////////////////////////////////////////// - -#ifndef SIXTACKLIB_COMMON_BE_BEAMFIELDS_FADDEEVA_CERN_HEADER_H__ -#define SIXTACKLIB_COMMON_BE_BEAMFIELDS_FADDEEVA_CERN_HEADER_H__ - -#if !defined( SIXTRL_NO_INCLUDES ) - #include "sixtracklib/common/definitions.h" -#endif /* !defined( SIXTRL_NO_INCLUDES ) */ - -#if defined( __cplusplus ) && !defined( _GPUCODE ) -extern "C" { -#endif /* !defined( __cplusplus ) && !defined( _GPUCODE ) */ - -SIXTRL_FN SIXTRL_STATIC void cerrf( - SIXTRL_REAL_T in_real, SIXTRL_REAL_T in_imag, - SIXTRL_ARGPTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, - SIXTRL_ARGPTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag ); - -#if defined( __cplusplus ) && !defined( _GPUCODE ) -} -#endif /* !defined( __cplusplus ) && !defined( _GPUCODE ) */ - -#if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) - #include -#endif - -#if defined( __cplusplus ) && !defined( _GPUCODE ) -extern "C" { -#endif /* !defined( __cplusplus ) && !defined( _GPUCODE ) */ - -/* From: be_beamfields/faddeeva_cern.h */ -SIXTRL_INLINE void cerrf( SIXTRL_REAL_T in_real, SIXTRL_REAL_T in_imag, - SIXTRL_ARGPTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, - SIXTRL_ARGPTR_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag ) -{ - /* This function calculates the SIXTRL_REAL_T precision complex error fnct. - based on the algorithm of the FORTRAN function written at CERN by K. Koelbig - Program C335, 1970. See also M. Bassetti and G.A. Erskine, "Closed - expression for the electric field of a two-dimensional Gaussian charge - density", CERN-ISR-TH/80-06; */ - - int n, nc, nu; - SIXTRL_REAL_T a_constant = 1.12837916709551; - SIXTRL_REAL_T xLim = 5.33; - SIXTRL_REAL_T yLim = 4.29; - SIXTRL_REAL_T h, q, Saux, Sx, Sy, Tn, Tx, Ty, Wx, Wy, xh, xl, x, yh, y; - SIXTRL_REAL_T Rx [33]; - SIXTRL_REAL_T Ry [33]; - - x = fabs(in_real); - y = fabs(in_imag); - - if (y < yLim && x < xLim){ - q = (1.0 - y / yLim) * sqrt(1.0 - (x / xLim) * (x / xLim)); - h = 1.0 / (3.2 * q); - nc = 7 + (int) (23.0 * q); - xl = pow(h, (SIXTRL_REAL_T) (1 - nc)); - xh = y + 0.5 / h; - yh = x; - nu = 10 + (int) (21.0 * q); - Rx[nu] = 0.; - Ry[nu] = 0.; - for (n = nu; n > 0; n--){ - Tx = xh + n * Rx[n]; - Ty = yh - n * Ry[n]; - Tn = Tx*Tx + Ty*Ty; - Rx[n-1] = 0.5 * Tx / Tn; - Ry[n-1] = 0.5 * Ty / Tn; - } - /* .... */ - Sx = 0.; - Sy = 0.; - for (n = nc; n>0; n--){ - Saux = Sx + xl; - Sx = Rx[n-1] * Saux - Ry[n-1] * Sy; - Sy = Rx[n-1] * Sy + Ry[n-1] * Saux; - xl = h * xl; - }; - Wx = a_constant * Sx; - Wy = a_constant * Sy; - } - else{ - xh = y; - yh = x; - Rx[0] = 0.; - Ry[0] = 0.; - for (n = 9; n>0; n--){ - Tx = xh + n * Rx[0]; - Ty = yh - n * Ry[0]; - Tn = Tx * Tx + Ty * Ty; - Rx[0] = 0.5 * Tx / Tn; - Ry[0] = 0.5 * Ty / Tn; - }; - Wx = a_constant * Rx[0]; - Wy = a_constant * Ry[0]; - } - if (y == 0.) {Wx = exp(-x * x);} - if (in_imag < 0.){ - Wx = 2.0 * exp(y * y - x * x) * cos(2.0 * x * y) - Wx; - Wy = - 2.0 * exp(y * y - x * x) * sin(2.0 * x * y) - Wy; - if (in_real > 0.) {Wy = -Wy;} - } - else if (in_real < 0.) {Wy = -Wy;} - - *out_real = Wx; - *out_imag = Wy; -} - -#if defined( __cplusplus ) && !defined( _GPUCODE ) -} -#endif /* !defined( __cplusplus ) && !defined( _GPUCODE ) */ - -#endif /* SIXTACKLIB_COMMON_BE_BEAMFIELDS_FADDEEVA_CERN_HEADER_H__ */ - -/* end: sixtracklib/common/be_beamfields/faddeeva_cern.h */ From 43cc32168dd904cbb3d14058d50e71a04ab3c228 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:07:32 +0200 Subject: [PATCH 57/77] common: updates calls to the faddeeva function to work with new conventions --- sixtracklib/common/be_beamfields/gauss_fields.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sixtracklib/common/be_beamfields/gauss_fields.h b/sixtracklib/common/be_beamfields/gauss_fields.h index f4bf5340b..3417cec10 100644 --- a/sixtracklib/common/be_beamfields/gauss_fields.h +++ b/sixtracklib/common/be_beamfields/gauss_fields.h @@ -46,7 +46,7 @@ SIXTRL_FN SIXTRL_STATIC void NS(get_Ex_Ey_Gx_Gy_gauss)( #if !defined( SIXTRL_NO_INCLUDES ) #include "sixtracklib/common/constants.h" - #include "sixtracklib/common/be_beamfields/faddeeva_cern.h" + #include "sixtracklib/common/be_beamfields/faddeeva.h" #endif #if !defined( _GPUCODE ) && defined( __cplusplus ) @@ -89,7 +89,10 @@ SIXTRL_INLINE void NS(get_transv_field_gauss_ellip)( SIXTRL_REAL_T S, factBE, Ex, Ey; SIXTRL_REAL_T etaBE_re, etaBE_im, zetaBE_re, zetaBE_im; - SIXTRL_REAL_T w_etaBE_re, w_etaBE_im, w_zetaBE_re, w_zetaBE_im; + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T w_etaBE_re; + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T w_etaBE_im; + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T w_zetaBE_re; + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T w_zetaBE_im; SIXTRL_REAL_T expBE; if (sigmax>sigmay){ @@ -103,9 +106,9 @@ SIXTRL_INLINE void NS(get_transv_field_gauss_ellip)( zetaBE_im = aby; //w_zetaBE_re, w_zetaBE_im = wfun(zetaBE_re/S, zetaBE_im/S) - cerrf(zetaBE_re/S, zetaBE_im/S , &(w_zetaBE_re), &(w_zetaBE_im)); + NS(cerrf)(zetaBE_re/S, zetaBE_im/S , &(w_zetaBE_re), &(w_zetaBE_im)); //w_etaBE_re, w_etaBE_im = wfun(etaBE_re/S, etaBE_im/S) - cerrf(etaBE_re/S, etaBE_im/S , &(w_etaBE_re), &(w_etaBE_im)); + NS(cerrf)(etaBE_re/S, etaBE_im/S , &(w_etaBE_re), &(w_etaBE_im)); expBE = exp(-abx*abx/(2*sigmax*sigmax)-aby*aby/(2*sigmay*sigmay)); @@ -123,9 +126,9 @@ SIXTRL_INLINE void NS(get_transv_field_gauss_ellip)( zetaBE_im = abx; //w_zetaBE_re, w_zetaBE_im = wfun(zetaBE_re/S, zetaBE_im/S) - cerrf(zetaBE_re/S, zetaBE_im/S , &(w_zetaBE_re), &(w_zetaBE_im)); + NS(cerrf)(zetaBE_re/S, zetaBE_im/S , &(w_zetaBE_re), &(w_zetaBE_im)); //w_etaBE_re, w_etaBE_im = wfun(etaBE_re/S, etaBE_im/S) - cerrf(etaBE_re/S, etaBE_im/S , &(w_etaBE_re), &(w_etaBE_im)); + NS(cerrf)(etaBE_re/S, etaBE_im/S , &(w_etaBE_re), &(w_etaBE_im)); expBE = exp(-aby*aby/(2*sigmay*sigmay)-abx*abx/(2*sigmax*sigmax)); From 9ef45dcd1570838798fc10684626fd959d4c3034 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:08:02 +0200 Subject: [PATCH 58/77] common: adds symbols in shared library for faddeeva related methods --- sixtracklib/common/be_beamfields/faddeeva.c | 229 ++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 sixtracklib/common/be_beamfields/faddeeva.c diff --git a/sixtracklib/common/be_beamfields/faddeeva.c b/sixtracklib/common/be_beamfields/faddeeva.c new file mode 100644 index 000000000..14f61aa65 --- /dev/null +++ b/sixtracklib/common/be_beamfields/faddeeva.c @@ -0,0 +1,229 @@ +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/be_beamfields/faddeeva.h" + + #if ( defined( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX == 1 ) ) || \ + ( defined( SIXTRL_CERRF_ABQ2011_USE_COEFF ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_COEFF == 1 ) ) + #include "sixtracklib/common/be_beamfields/abq2011_coeff.h" + #endif /* SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX || + SIXTRL_CERRF_ABQ2011_USE_COEFF */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + #include "sixtracklib/common/be_beamfields/dawson_approx.h" + + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + #include "sixtracklib/common/be_beamfields/dawson_coeff.h" + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) + +void NS(cerrf_cernlib_c_baseline_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + NS(cerrf_cernlib_c_baseline_q1)( x, y, out_real, out_imag ); +} + +void NS(cerrf_cernlib_c_upstream_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + NS(cerrf_cernlib_c_upstream_q1_ext)( x, y, out_real, out_imag ); +} + +void NS(cerrf_cernlib_c_optimised_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(cerrf_cernlib_c_optimised_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ] ); + + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + NS(cerrf_cernlib_c_optimised_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ], + &NS(CERRF_DAWSON_FZ_KK_XI)[ 0 ] ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_APPROX || SIXTRL_CERRF_USE_DAWSON_COEFF == 0 */ + NS(cerrf_cernlib_c_optimised_q1)( x, y, out_real, out_imag ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +} + +void NS(cerrf_alg680_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(cerrf_alg680_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ] ); + + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + NS(cerrf_alg680_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ], + &NS(CERRF_DAWSON_FZ_KK_XI)[ 0 ] ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_APPROX || SIXTRL_CERRF_USE_DAWSON_COEFF == 0 */ + NS(cerrf_alg680_q1)( x, y, out_real, out_imag ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +} + +SIXTRL_REAL_T NS(cerrf_abq2011_a_m_coeff_ext)( int const m ) SIXTRL_NOEXCEPT { + return NS(cerrf_abq2011_a_m_coeff)( m ); } + +void NS(cerrf_abq2011_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(cerrf_abq2011_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ] ); + + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + NS(cerrf_abq2011_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ], + &NS(CERRF_DAWSON_FZ_KK_XI)[ 0 ] ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_APPROX || SIXTRL_CERRF_USE_DAWSON_COEFF == 0 */ + NS(cerrf_abq2011_q1)( x, y, out_real, out_imag ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +} + +void NS(cerrf_abq2011_q1_coeff_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag, + SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT a_m, + SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT b_n +) SIXTRL_NOEXCEPT +{ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(cerrf_abq2011_q1_coeff)( x, y, out_real, out_imag, a_m, + b_n, &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ] ); + + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + NS(cerrf_abq2011_q1_coeff)( x, y, out_real, out_imag, a_m, + b_n, &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ], + &NS(CERRF_DAWSON_FZ_KK_XI)[ 0 ] ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_APPROX || SIXTRL_CERRF_USE_DAWSON_COEFF == 0 */ + NS(cerrf_abq2011_q1_coeff)( x, y, out_real, out_imag, a_m, b_n ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +} + +void NS(cerrf_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + #if ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_BASELINE ) + NS(cerrf_cernlib_c_baseline_q1_ext)( x, y, out_real, out_imag ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) + NS(cerrf_cernlib_c_upstream_q1_ext)( x, y, out_real, out_imag ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 ) + NS(cerrf_alg680_q1_ext)( x, y, out_real, out_imag ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 ) + NS(cerrf_abq2011_q1_ext)( x, y, out_real, out_imag ); + #else /* SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_OPTIMISED */ + NS(cerrf_cernlib_c_optimised_q1_ext)( x, y, out_real, out_imag ); + #endif /* SIXTRL_CERRF_METHOD */ +} + +void NS(cerrf_ext)( + SIXTRL_REAL_T x, SIXTRL_REAL_T y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + real_type const sign_x = ( real_type )( ( x >= ( real_type )0. ) - + ( x < ( real_type )0. ) ); + + real_type const sign_y = ( real_type )( ( y >= ( real_type )0. ) - + ( y < ( real_type )0. ) ); + + SIXTRL_CERRF_RESULT_DEC real_type Wx; + SIXTRL_CERRF_RESULT_DEC real_type Wy; + + x *= sign_x; + y *= sign_y; + + #if ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_BASELINE ) + NS(cerrf_cernlib_c_baseline_q1_ext)( x, y, &Wx, &Wy ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) + NS(cerrf_cernlib_c_upstream_q1_ext)( x, y, &Wx, &Wy ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 ) + NS(cerrf_alg680_q1_ext)( x, y, &Wx, &Wy ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 ) + NS(cerrf_abq2011_q1_ext)( x, y, &Wx, &Wy ); + #else /* SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_OPTIMISED */ + NS(cerrf_cernlib_c_optimised_q1_ext)( x, y, &Wx, &Wy ); + #endif /* SIXTRL_CERRF_METHOD */ + + if( sign_y < ( real_type )0.0 ) /* Quadrants Q3 and Q4 */ + { + real_type const exp_arg = ( y - x ) * ( y + x ); + real_type const trig_arg = ( real_type )2. * x * y; + real_type const exp_factor = ( real_type )2. * exp( exp_arg ); + + SIXTRL_RESULT_PTR_DEC real_type sin_arg; + SIXTRL_RESULT_PTR_DEC real_type cos_arg; + + NS(sincos)( trig_arg, &sin_arg, &cos_arg ); + Wx = exp_factor * cos_arg - Wx; + Wy = exp_factor * sin_arg + Wy; + } + + *out_x = Wx; + *out_y = sign_x * Wy; /* Takes care of Quadrants Q2 and Q3 */ +} + +#endif /* !defined( _GPUCODE ) */ From eb409cb389f54e0de42e15911d50037159b2174e Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:08:49 +0200 Subject: [PATCH 59/77] common: adds all parameters controlling faddeeva as preprocessor macros --- .../common/be_beamfields/definitions.h | 558 ++++++++++++++++++ 1 file changed, 558 insertions(+) create mode 100644 sixtracklib/common/be_beamfields/definitions.h diff --git a/sixtracklib/common/be_beamfields/definitions.h b/sixtracklib/common/be_beamfields/definitions.h new file mode 100644 index 000000000..ac8e996c9 --- /dev/null +++ b/sixtracklib/common/be_beamfields/definitions.h @@ -0,0 +1,558 @@ +#ifndef SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DEFINITIONS_H__ +#define SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DEFINITIONS_H__ + +#if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) + #include + #include +#endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/definitions.h" + #include "sixtracklib/common/internal/particles_defines.h" + #include "sixtracklib/common/buffer/buffer_type.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if defined( __cplusplus ) && !defined( _GPUCODE ) && !defined( __CUDA_ARCH__ ) +extern "C" { +#endif /* C++, Host */ + +#if !defined( SIXTRL_REAL_TYPE_EPS ) + #define SIXTRL_REAL_TYPE_EPS 2.22044604925031e-16 +#endif /* !defined( SIXTRL_REAL_TYPE_EPS ) */ + +#if !defined( SIXTRL_REAL_TYPE_MIN ) + #define SIXTRL_REAL_TYPE_MIN 2.22507385850720e-308 +#endif /* !defined( SIXTRL_REAL_TYPE_MIN ) */ + +#if !defined( SIXTRL_REAL_TYPE_MAX ) + #define SIXTRL_REAL_TYPE_MAX 1.79769313486232e+308 +#endif /* !defined( SIXTRL_REAL_TYPE_MAX ) */ + +#if !defined( SIXTRL_REAL_XY_ABS_OVERFLOW_LIMIT ) + #define SIXTRL_REAL_XY_ABS_OVERFLOW_LIMIT 1.6814159916986475e-8 +#endif /* !defined( SIXTRL_REAL_XY_ABS_OVERFLOW_LIMIT ) */ + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +#if !defined( SIXTRL_CERRF_RESULT_DEC ) + #define SIXTRL_CERRF_RESULT_DEC_UNDEF + #if defined( SIXTRL_ARGPTR_DEC ) + #define SIXTRL_CERRF_RESULT_DEC SIXTRL_ARGPTR_DEC + #else /* defined( SIXTRL_ARGPTR_DEC ) */ + #define SIXTRL_CERRF_RESULT_DEC + #endif /* defined( SIXTRL_ARGPTR_DEC ) */ +#endif /* !defined( SIXTRL_CERRF_RESULT_DEC ) */ + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +#define SIXTRL_CERRF_CERNLIB_OPTIMISED 0 +#define SIXTRL_CERRF_CERNLIB_BASELINE 1 +#define SIXTRL_CERRF_CERNLIB_UPSTREAM 2 +#define SIXTRL_CERRF_ALG680 3 +#define SIXTRL_CERRF_ABQ2011 4 + +/* ------------------------------------------------------------------------- */ + +#if !defined( SIXTRL_CERRF_METHOD ) + #define SIXTRL_CERRF_METHOD 0 +#endif /* !defined( SIXTRL_CERRF_METHOD ) */ + +/* ************************************************************************* */ +/* Cernlib upstream: */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_X0 ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_X0 8.3 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_X0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_Y0 ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_Y0 7.4 +#endif /* !defined( CERRF_UPSTREAM_Y0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_NMAX ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_NMAX 37 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_NMAX ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_Y ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_Y 1e-20 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_Y ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_POW_2H_N ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_POW_2H_N 2.22044604925031e-16 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_POW_2H_N ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_H_0 ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_H_0 1.6 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_H_0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_NU_0 ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_NU 36 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_NU_0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_N_0 ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_N 34 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_N_0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_K ) + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_K 9 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_K ) */ + +/* ************************************************************************* */ +/* Cernlib baseline and optimised: */ +/* possible command line flags: + * SIXTRL_CERRF_CERNLIB_NO_GZ_WEIGHT_FN -> don't calculate g(z), use g(z) = 1 + */ + +#if !defined( SIXTRL_CERRF_CERNLIB_X0 ) + #define SIXTRL_CERRF_CERNLIB_X0 5.33 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_X0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_Y0 ) + #define SIXTRL_CERRF_CERNLIB_Y0 4.29 +#endif /* !defined( CERRF_Y0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_INV_X0 ) + #define SIXTRL_CERRF_CERNLIB_INV_X0 \ + 0.1876172607879924928084412428495993283834529037070377056722994255411 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_INV_X0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_INV_Y0 ) + #define SIXTRL_CERRF_CERNLIB_INV_Y0 \ + 0.2331002331002330983027077233874512201442963277845553760221051735203 +#endif /* !defined( CERRF_INV_Y0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_MIN_Y ) + #define SIXTRL_CERRF_CERNLIB_MIN_Y 1e-20 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_MIN_Y ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N ) + #define SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N 2.22044604925031e-16 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_H_0 ) + #define SIXTRL_CERRF_CERNLIB_H_0 1.6 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_H_0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_NU_0 ) + #define SIXTRL_CERRF_CERNLIB_NU_0 10 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_NU_0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_NU_1 ) + #define SIXTRL_CERRF_CERNLIB_NU_1 21 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_NU_1 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_N_0 ) + #define SIXTRL_CERRF_CERNLIB_N_0 7 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_N_0 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_N_1 ) + #define SIXTRL_CERRF_CERNLIB_N_1 23 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_N_1 ) */ + +#if !defined( SIXTRL_CERRF_CERNLIB_K ) + #define SIXTRL_CERRF_CERNLIB_K 9 +#endif /* !defined( SIXTRL_CERRF_CERNLIB_K ) */ + +#if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + + #if !defined( SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y ) + #define SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y 0.5 + #endif /* !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_Y ) */ + + #if !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X ) + #define SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X 0.0 + #endif /* !defined( SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MIN_X ) */ + + #if !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) + #define SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X 9.0 + #endif /* !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) */ +#endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + +/* ************************************************************************* */ +/* ACM Algorithm 680: */ + +#if !defined( SIXTRL_CERRF_ALG680_X0 ) + #define SIXTRL_CERRF_ALG680_X0 6.3 +#endif /* !defined( SIXTRL_CERRF_ALG680_X0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_Y0 ) + #define SIXTRL_CERRF_ALG680_Y0 4.4 +#endif /* !defined( SIXTRL_CERRF_ALG680_Y0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_INV_X0 ) + #define SIXTRL_CERRF_ALG680_INV_X0 \ + 0.1587301587301587346343079853873784700534576710619202837164744777375 +#endif /* !defined( SIXTRL_CERRF_ALG680_INV_X0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_INV_Y0 ) + #define SIXTRL_CERRF_ALG680_INV_Y0 \ + 0.227272727272727254376478931815595854870297787025775669315971535784 +#endif /* !defined( SIXTRL_CERRF_ALG680_INV_Y0 ) */ + +#if !defined( SIXTRL_ALG680_MAX_REAL_MAX_EXP ) + /* ln( SIXTRL_REAL_TYPE_MAX ) - ln( 2.0 ) */ + #define SIXTRL_ALG680_MAX_REAL_MAX_EXP 709.089565712824 +#endif /* !defined( SIXTRL_ALG680_MAX_REAL_MAX_EXP ) */ + +#if !defined( SIXTRL_CERRF_ALG680_REAL_MAX_X ) + /* sqrt( SIXTRL_CERRF_ALG680_REAL_MAX_X / SIXTRL_CERRF_ALG680_X0 ) = 5.34179854210932e+153 */ + /* choosen 5e153 to be on the safe side */ + #define SIXTRL_CERRF_ALG680_REAL_MAX_X 5.0e153 +#endif /* !defined( SIXTRL_CERRF_ALG680_REAL_MAX_X ) */ + +#if !defined( SIXTRL_CERRF_ALG680_REAL_MAX_Y ) + /* sqrt( SIXTRL_CERRF_ALG680_REAL_MAX_Y / SIXTRL_CERRF_ALG680_Y0 ) = 6.39192163249533e+153 */ + /* choosen 6e153 to be on the safe side */ + #define SIXTRL_CERRF_ALG680_REAL_MAX_Y 6.0e153 +#endif /* !defined( SIXTRL_CERRF_ALG680_REAL_MAX_Y ) */ + +#if !defined( SIXTRL_CERRF_ALG680_QRHO_SQU_POWER_SERIES_LIMIT ) + #define SIXTRL_CERRF_ALG680_QRHO_SQU_POWER_SERIES_LIMIT 0.0852640 +#endif /* !defined( SIXTRL_CERRF_ALG680_QRHO_SQU_POWER_SERIES_LIMIT ) */ + +#if !defined( SIXTRL_CERRF_ALG680_QRHO_SQU_CONT_FRAC_LIMIT ) + #define SIXTRL_CERRF_ALG680_QRHO_SQU_CONT_FRAC_LIMIT 1.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_QRHO_SQU_CONT_FRAC_LIMIT ) */ + +#if !defined( SIXTRL_CERRF_ALG680_QRHO_C0 ) + #define SIXTRL_CERRF_ALG680_QRHO_C0 1.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_QRHO_C0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_QRHO_C1 ) + #define SIXTRL_CERRF_ALG680_QRHO_C1 0.85 +#endif /* !defined( SIXTRL_CERRF_ALG680_QRHO_C1 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_N_R0 ) + #define SIXTRL_CERRF_ALG680_N_R0 6.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_N_R0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_N_R1 ) + #define SIXTRL_CERRF_ALG680_N_R1 72.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_N_R1 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_H1 ) + #define SIXTRL_CERRF_ALG680_H1 1.88 +#endif /* !defined( SIXTRL_CERRF_ALG680_H1 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_NU_S0 ) + #define SIXTRL_CERRF_ALG680_NU_S0 16.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_NU_S0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_NU_S1 ) + #define SIXTRL_CERRF_ALG680_NU_S1 26.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_NU_S1 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_N_S0 ) + #define SIXTRL_CERRF_ALG680_N_S0 7.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_N_S0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_N_S1 ) + #define SIXTRL_CERRF_ALG680_N_S1 34.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_N_S1 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_K0_CONT_FRACTION ) + #define SIXTRL_CERRF_ALG680_K0_CONT_FRACTION 3.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_K0_CONT_FRACTION ) */ + +#if !defined( SIXTRL_CERRF_ALG680_K1_CONT_FRACTION ) + #define SIXTRL_CERRF_ALG680_K1_CONT_FRACTION 1442.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_K1_CONT_FRACTION ) */ + +#if !defined( SIXTRL_CERRF_ALG680_K2_CONT_FRACTION ) + #define SIXTRL_CERRF_ALG680_K2_CONT_FRACTION 77 +#endif /* !defined( SIXTRL_CERRF_ALG680_K2_CONT_FRACTION ) */ + +#if !defined( SIXTRL_CERRF_ALG680_K3_CONT_FRACTION ) + #define SIXTRL_CERRF_ALG680_K3_CONT_FRACTION 26 +#endif /* !defined( SIXTRL_CERRF_ALG680_K3_CONT_FRACTION ) */ + +#if !defined( SIXTRL_CERRF_ALG680_K_Q0 ) + #define SIXTRL_CERRF_ALG680_K_Q0 16.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_K_Q0 ) */ + +#if !defined( SIXTRL_CERRF_ALG680_K_Q1 ) + #define SIXTRL_CERRF_ALG680_K_Q1 26.0 +#endif /* !defined( SIXTRL_CERRF_ALG680_K_Q1 ) */ + +#if !defined( SIXTRL_CRRF_ALG680_MIN_TWO_H_VALUE ) + #define SIXTRL_CERRF_ALG680_MIN_TWO_H_VALUE 2.22044604925031e-16 +#endif /* !defined( SIXTRL_CRRF_ALG680_MIN_TWO_H_VALUE ) */ + +#if !defined( SIXTRL_CERRF_ALG680_MIN_POW_2H_N ) + #define SIXTRL_CERRF_ALG680_MIN_POW_2H_N 2.22044604925031e-16 +#endif /* !defined( SIXTRL_CERRF_ALG680_MIN_POW_2H_N ) */ + +#if !defined( SIXTRL_CERRF_ALG680_MIN_Y ) + #define SIXTRL_CERRF_ALG680_MIN_Y 1e-20 +#endif /* !defined( SIXTRL_CERRF_ALG680_MIN_Y ) */ + +#if !defined( SIXTRL_CERRF_ALG680_TWO_H_LIMIT ) + #define SIXTRL_CERRF_ALG680_TWO_H_LIMIT 0.25 +#endif /* SIXTRL_CERRF_ALG680_TWO_H_LIMIT */ + +#if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + + #if !defined( SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_Y ) + #define SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_Y 0.5 + #endif /* !defined( SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_Y ) */ + + #if !defined( SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MIN_X ) + #define SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MIN_X 1.83 + #endif /* !defined( SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MIN_X ) */ + + #if !defined( SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_X ) + #define SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_X 10.0 + #endif /* !defined( SIXTRL_CERRF_ALG680_USE_DAWSON_APPROX_MAX_X ) */ +#endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + +/* ************************************************************************* */ +/* Abrarov & Quine 2011 / 2012: */ + +#if !defined( SIXTRL_CERRF_ABQ2011_N_FOURIER ) + #define SIXTRL_CERRF_ABQ2011_N_FOURIER 24 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_N_FOURIER ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_TM ) + #define SIXTRL_CERRF_ABQ2011_TM 12 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_TM ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_TM_SQU ) + #define SIXTRL_CERRF_ABQ2011_TM_SQU 144.0 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_TM_SQU ) */ + +/* tm * tm / sqrt( pi ) */ +#if !defined( SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI ) + #define SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI \ + 81.24330003087690532052344102475125236154329062337583538554834392607 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI ) */ + + +#if !defined( SIXTRL_CERRF_ABQ2011_CONT_FRACTION_K ) + #define SIXTRL_CERRF_ABQ2011_CONT_FRACTION_K 9 +#endif /* SIXTRL_CERRF_ABQ2011_CONT_FRACTION_K */ + + +#if !defined( SIXTRL_CERRF_ABQ2011_N_TAYLOR ) + #define SIXTRL_CERRF_ABQ2011_N_TAYLOR 6 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_N_TAYLOR ) */ + +/* SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF = N_FOURIER * N_TAYLOR * 2 */ +#if !defined( SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF ) + #define SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF 288 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_PI_OVER_TM ) + #define SIXTRL_CERRF_ABQ2011_PI_OVER_TM \ + 0.261799387799149436538553615273291907016430783281258818414578716 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_PI_OVER_TM ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_TM_OVER_PI ) + #define SIXTRL_CERRF_ABQ2011_TM_OVER_PI \ + 3.819718634205488058453210320940344688827031497770954769944016257 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_TM_OVER_PI ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST ) + #define SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST 3e-3 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST_SQU ) + #define SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST_SQU 9e-6 +#endif /* !defined( SIXTRL_CERRF_ABQ2011_MIN_POLE_DIST_SQU ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_CONT_FRACTION_LIMIT_SQU ) + #define SIXTRL_CERRF_ABQ2011_CONT_FRACTION_LIMIT_SQU 225.0 +#endif /* SIXTRL_CERRF_ABQ2011_CONT_FRACTION_LIMIT_SQU */ + +#if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + + #if !defined( SIXTRL_CERRF_ABQ2011_DAWSON_X_MIN ) + #define SIXTRL_CERRF_ABQ2011_DAWSON_X_MIN 0.1 + #endif /* !defined( SIXTRL_CERRF_ABQ2011_DAWSON_X_MIN ) */ + + #if !defined( SIXTRL_CERRF_ABQ2011_DAWSON_X_MAX ) + #if defined( SIXTRL_CERRF_ABQ2011_USE_CONTINUOUS_FRACTION ) && \ + ( SIXTRL_CERRF_ABQ2011_USE_CONTINUOUS_FRACTION == 1 ) + #define SIXTRL_CERRF_ABQ2011_DAWSON_X_MAX 15.01 + #else + #define SIXTRL_CERRF_ABQ2011_DAWSON_X_MAX 20.00 + #endif + #endif /* !defined( SIXTRL_CERRF_ABQ2011_DAWSON_X_MAX ) */ + + #if !defined( SIXTRL_CERRF_ABQ2011_DAWSON_Y_MAX ) + #define SIXTRL_CERRF_ABQ2011_DAWSON_Y_MAX 0.5 + #endif /* !defined( SIXTRL_CERRF_ABQ2011_DAWSON_Y_MAX ) */ + +#endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + +/* ------------------------------------------------------------------------- */ + +#if ( SIXTRL_CERRF_ABQ2011_N_FOURIER == 24 ) && \ + ( SIXTRL_CERRF_ABQ2011_TM == 12 ) + + #define SIXTRL_CERRF_ABQ2011_A00 0.295408975150919337883027913890 + #define SIXTRL_CERRF_ABQ2011_A01 0.275840233292177084395258287749 + #define SIXTRL_CERRF_ABQ2011_A02 0.224573955224615866231619198223 + #define SIXTRL_CERRF_ABQ2011_A03 0.159414938273911722757388079389 + #define SIXTRL_CERRF_ABQ2011_A04 0.0986657664154541891084237249422 + #define SIXTRL_CERRF_ABQ2011_A05 0.0532441407876394120414705837561 + #define SIXTRL_CERRF_ABQ2011_A06 0.0250521500053936483557475638078 + #define SIXTRL_CERRF_ABQ2011_A07 0.0102774656705395362477551802420 + #define SIXTRL_CERRF_ABQ2011_A08 0.00367616433284484706364335443079 + #define SIXTRL_CERRF_ABQ2011_A09 0.00114649364124223317199757239908 + #define SIXTRL_CERRF_ABQ2011_A10 0.000311757015046197600406683642851 + #define SIXTRL_CERRF_ABQ2011_A11 0.0000739143342960301487751427184143 + #define SIXTRL_CERRF_ABQ2011_A12 0.0000152794934280083634658979605774 + #define SIXTRL_CERRF_ABQ2011_A13 0.00000275395660822107093261423133381 + #define SIXTRL_CERRF_ABQ2011_A14 4.32785878190124505246159684324E-7 + #define SIXTRL_CERRF_ABQ2011_A15 5.93003040874588104132914772669E-8 + #define SIXTRL_CERRF_ABQ2011_A16 7.08449030774820424708618991843E-9 + #define SIXTRL_CERRF_ABQ2011_A17 7.37952063581678039121116498488E-10 + #define SIXTRL_CERRF_ABQ2011_A18 6.70217160600200763046136003593E-11 + #define SIXTRL_CERRF_ABQ2011_A19 5.30726516347079017807414252726E-12 + #define SIXTRL_CERRF_ABQ2011_A20 3.66432411346763916925386157070E-13 + #define SIXTRL_CERRF_ABQ2011_A21 2.20589494494103134281934595834E-14 + #define SIXTRL_CERRF_ABQ2011_A22 1.15782686262855878932236226031E-15 + #define SIXTRL_CERRF_ABQ2011_A23 5.29871142946730483659082681849E-17 + +#endif /* */ + +#if !defined( SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC ) + #define SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC_UNDEF + #if defined( SIXTRL_BUFFER_DATAPTR_DEC ) + #define SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC SIXTRL_BUFFER_DATAPTR_DEC + #elif defined( SIXTRL_DATAPTR_DEC ) + #define SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC SIXTRL_DATAPTR_DEC + #else /* defined( SIXTRL_DATAPTR_DEC ) */ + #define SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC + #endif /* defined( SIXTRL_ARGPTR_DEC ) */ +#endif /* !defined( SIXTRL_CERRF_ABQ2011_FOURIER_COEFF_DEC ) */ + +#if !defined( SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC ) + #define SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC_UNDEF + #if defined( SIXTRL_BUFFER_DATAPTR_DEC ) + #define SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC SIXTRL_BUFFER_DATAPTR_DEC + #elif defined( SIXTRL_DATAPTR_DEC ) + #define SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC SIXTRL_DATAPTR_DEC + #else /* defined( SIXTRL_DATAPTR_DEC ) */ + #define SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC + #endif /* defined( SIXTRL_ARGPTR_DEC ) */ +#endif /* !defined( SIXTRL_CERRF_ABQ2011_TAYLOR_COEFF_DEC ) */ + +/* ************************************************************************* */ +/* Dawson approximation: */ + +#if !defined( SIXTRL_CERRF_DAWSON_XN_MIN ) + #define SIXTRL_CERRF_DAWSON_XN_MIN 0 +#endif /* !defined( SIXTRL_CERRF_DAWSON_XN_MIN ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_XN_MAX ) + #define SIXTRL_CERRF_DAWSON_XN_MAX 20 +#endif /* !defined( SIXTRL_CERRF_DAWSON_XN_MAX ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_DX ) + #define SIXTRL_CERRF_DAWSON_DX \ + 0.2105263157894736842105263157894736842105263157894737 +#endif /* !defined( SIXTRL_CERRF_DAWSON_DX ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_N_XN ) + #define SIXTRL_CERRF_DAWSON_N_XN 96 +#endif /* !defined( SIXTRL_CERRF_DAWSON_N_XN ) */ + +/* SIXTRL_CERRF_DAWSON_X_MIN = SIXTRL_CERRF_DAWSON_XN_MIN */ +#if !defined( SIXTRL_CERRF_DAWSON_X_MIN ) + #define SIXTRL_CERRF_DAWSON_X_MIN 0.0 +#endif /* !defined( SIXTRL_CERRF_DAWSON_X_MIN ) */ + +/* SIXTRL_CERRF_DAWSON_X_MAX = SIXTRL_CERRF_DAWSON_XN_MIN + ( + * SIXTRL_CERRF_DAWSON_N_XN - 0.5 ) * SIXTRL_CERRF_DAWSON_DX */ +#if !defined( SIXTRL_CERRF_DAWSON_X_MAX ) + #define SIXTRL_CERRF_DAWSON_X_MAX \ + 20.10526315789473572603895945576368831098079681396484375 +#endif /* !defined( SIXTRL_CERRF_DAWSON_X_MAX ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_Y_MIN ) + #define SIXTRL_CERRF_DAWSON_Y_MIN 0.0 +#endif /* !defined( SIXTRL_CERRF_DAWSON_Y_MIN ) */ + + +#if !defined( SIXTRL_CERRF_DAWSON_Y_MAX ) + #define SIXTRL_CERRF_DAWSON_Y_MAX 0.1 +#endif /* !defined( SIXTRL_CERRF_DAWSON_Y_MAX ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_ABS_D10 ) + #define SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_ABS_D10 19 +#endif /* SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_ABS_D10 */ + +#if !defined( SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_REL_D14 ) + #define SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_REL_D14 25 +#endif /* SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_REL_D14 */ + +#if !defined( SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR ) + #if ( SIXTRL_CERRF_METHOD == 0 ) || ( SIXTRL_CERRF_METHOD == 1 ) + #define SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR 19 + #else + #define SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR 25 + #endif /* SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR */ +#endif /* SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR */ + +#if !defined( SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN ) + #define SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN 25 +#endif /* SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN */ + +/* SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN * SIXTRL_CERRF_DAWSON_N_XN */ +#if !defined( SIXTRL_CERRF_DAWSON_NUM_TAYLOR_COEFF ) + #define SIXTRL_CERRF_DAWSON_NUM_TAYLOR_COEFF 2400 +#endif /* !defined( SIXTRL_CERRF_DAWSON_NUM_TAYLOR_COEFF ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC_UNDEF + #if defined( SIXTRL_BUFFER_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_BUFFER_DATAPTR_DEC + #elif defined( SIXTRL_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_DATAPTR_DEC + #else /* defined( SIXTRL_DATAPTR_DEC ) */ + #define SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC + #endif /* defined( SIXTRL_ARGPTR_DEC ) */ +#endif /* !defined( SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_COEFF_NT_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_NT_DEC_UNDEF + #if defined( SIXTRL_BUFFER_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_BUFFER_DATAPTR_DEC + #elif defined( SIXTRL_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_DATAPTR_DEC + #else /* defined( SIXTRL_DATAPTR_DEC ) */ + #define SIXTRL_CERRF_DAWSON_COEFF_NT_DEC + #endif /* defined( SIXTRL_ARGPTR_DEC ) */ +#endif /* !defined( SIXTRL_CERRF_DAWSON_COEFF_NT_DEC ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_COEFF_XI_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_XI_DEC_UNDEF + #if defined( SIXTRL_BUFFER_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_BUFFER_DATAPTR_DEC + #elif defined( SIXTRL_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_DATAPTR_DEC + #else /* defined( SIXTRL_DATAPTR_DEC ) */ + #define SIXTRL_CERRF_DAWSON_COEFF_XI_DEC + #endif /* defined( SIXTRL_ARGPTR_DEC ) */ +#endif /* !defined( SIXTRL_CERRF_DAWSON_COEFF_XI_DEC ) */ + +#if !defined( SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC_UNDEF + #if defined( SIXTRL_BUFFER_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_BUFFER_DATAPTR_DEC + #elif defined( SIXTRL_DATAPTR_DEC ) + #define SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_DATAPTR_DEC + #else /* defined( SIXTRL_DATAPTR_DEC ) */ + #define SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC + #endif /* defined( SIXTRL_ARGPTR_DEC ) */ +#endif /* !defined( SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC ) */ + + + +#if defined( __cplusplus ) && !defined( _GPUCODE ) && !defined( __CUDA_ARCH__ ) +} +#endif /* C++, Host */ +#endif /* SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DEFINITIONS_H__ */ From fc10ea2573763525b0170d5a1b7c222d7897e8f9 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:09:36 +0200 Subject: [PATCH 60/77] common: adds dawson approximation for faddeeva with small imaginary arguments --- .../common/be_beamfields/dawson_approx.c | 71 + .../common/be_beamfields/dawson_approx.h | 764 +++++ .../common/be_beamfields/dawson_coeff.h | 37 + .../be_beamfields/dawson_coeff_xn96_ntmax25.c | 3011 +++++++++++++++++ 4 files changed, 3883 insertions(+) create mode 100644 sixtracklib/common/be_beamfields/dawson_approx.c create mode 100644 sixtracklib/common/be_beamfields/dawson_approx.h create mode 100644 sixtracklib/common/be_beamfields/dawson_coeff.h create mode 100644 sixtracklib/common/be_beamfields/dawson_coeff_xn96_ntmax25.c diff --git a/sixtracklib/common/be_beamfields/dawson_approx.c b/sixtracklib/common/be_beamfields/dawson_approx.c new file mode 100644 index 000000000..523f891cf --- /dev/null +++ b/sixtracklib/common/be_beamfields/dawson_approx.c @@ -0,0 +1,71 @@ +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/be_beamfields/dawson_approx.h" + + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + #include "sixtracklib/common/be_beamfields/dawson_coeff.h" + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) + +int NS(dawson_n_interval_ext)( SIXTRL_REAL_T x ) SIXTRL_NOEXCEPT { + return NS(dawson_n_interval)( x ); } + +SIXTRL_REAL_T NS(dawson_xi_ext)( int n_interval ) SIXTRL_NOEXCEPT { + return NS(dawson_xi)( n_interval ); } + +SIXTRL_REAL_T NS(dawson_fz_xi_ext)( int n_interval ) SIXTRL_NOEXCEPT { + return NS(dawson_fz_xi)( n_interval ); } + +int NS(dawson_nt_xi_abs_d10_ext)( int n_interval ) SIXTRL_NOEXCEPT { + return NS(dawson_nt_xi_abs_d10)( n_interval ); } + +void NS(dawson_cerrf_nocoeff_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + NS(dawson_cerrf)( x, y, out_real, out_imag ); +} + +void NS(dawson_cerrf_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + #if ( SIXTRL_CERRF_METHOD == 0 ) || ( SIXTRL_CERRF_METHOD == 1 ) + NS(dawson_cerrf_coeff)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ], &NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_ABS_D10)[ 0 ] ); + #else /* ( SIXTRL_CERRF_METHOD > 1 ) */ + NS(dawson_cerrf_coeff)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ], &NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ] ); + #endif /* ( SIXTRL_CERRF_METHOD ) */ + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #if ( SIXTRL_CERRF_METHOD == 0 ) || ( SIXTRL_CERRF_METHOD == 1 ) + NS(dawson_cerrf_coeff)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ], &NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_ABS_D10)[ 0 ], + &NS(CERRF_DAWSON_FZ_KK_XI)[ 0 ] ); + ); + #else /* ( SIXTRL_CERRF_METHOD > 1 ) */ + NS(dawson_cerrf_coeff)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ], &NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ], + &NS(CERRF_DAWSON_FZ_KK_XI)[ 0 ] ); + #endif /* ( SIXTRL_CERRF_METHOD ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ +} + +#endif /* !defined( _GPUCODE ) */ + diff --git a/sixtracklib/common/be_beamfields/dawson_approx.h b/sixtracklib/common/be_beamfields/dawson_approx.h new file mode 100644 index 000000000..d192d9a42 --- /dev/null +++ b/sixtracklib/common/be_beamfields/dawson_approx.h @@ -0,0 +1,764 @@ +#ifndef SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DAWSON_APPROX_H__ +#define SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DAWSON_APPROX_H__ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/be_beamfields/definitions.h" + #include "sixtracklib/common/internal/math_constants.h" + #include "sixtracklib/common/internal/math_factorial.h" + #include "sixtracklib/common/internal/math_functions.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +extern "C" { +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ + +SIXTRL_STATIC SIXTRL_FN int NS(dawson_n_interval)( + SIXTRL_REAL_T x ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(dawson_xi)( + int n_interval ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN SIXTRL_REAL_T NS(dawson_fz_xi)( + int n_interval ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN int NS(dawson_nt_xi_abs_d10)( + int n_interval ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN int NS(dawson_nt_xi_rel_d14)( + int n_interval ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN void NS(dawson_cerrf)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +#if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + +SIXTRL_STATIC SIXTRL_FN void NS(dawson_cerrf_coeff)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag, + SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 0 ) */ +) SIXTRL_NOEXCEPT; + +#endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 0 ) */ + +#if !defined( _GPUCODE ) +SIXTRL_EXTERN SIXTRL_HOST_FN int NS(dawson_n_interval_ext)( + SIXTRL_REAL_T x ) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN SIXTRL_REAL_T NS(dawson_xi_ext)( + int n_interval ) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN SIXTRL_REAL_T NS(dawson_fz_xi_ext)( + int n_interval ) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN int NS(dawson_nt_xi_abs_d10_ext)( + int n_interval ) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(dawson_cerrf_nocoeff_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(dawson_cerrf_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +#endif /* !defined( _GPUCODE ) */ + +/* ************************************************************************ */ + +SIXTRL_INLINE int NS(dawson_n_interval)( SIXTRL_REAL_T x ) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + int const n_interval = ( int )NS(round)( + ( x - ( real_type )SIXTRL_CERRF_DAWSON_XN_MIN ) / + ( real_type )SIXTRL_CERRF_DAWSON_DX ); + + SIXTRL_ASSERT( x >= ( real_type )SIXTRL_CERRF_DAWSON_X_MIN ); + SIXTRL_ASSERT( x <= ( real_type )SIXTRL_CERRF_DAWSON_X_MAX ); + + return n_interval; +} + +#if defined( SIXTRL_CERRF_DAWSON_N_XN ) && ( SIXTRL_CERRF_DAWSON_N_XN == 96 ) && \ + defined( SIXTRL_CERRF_DAWSON_XN_MIN ) && ( SIXTRL_CERRF_DAWSON_XN_MIN == 0 ) && \ + defined( SIXTRL_CERRF_DAWSON_XN_MAX ) && ( SIXTRL_CERRF_DAWSON_XN_MAX == 20 ) + +SIXTRL_INLINE SIXTRL_REAL_T NS(dawson_xi)( int n_interval ) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + real_type xi; + + SIXTRL_ASSERT( n_interval >= 0 ); + SIXTRL_ASSERT( n_interval < ( int )SIXTRL_CERRF_DAWSON_N_XN ); + + switch( n_interval ) + { + case 1: { xi = ( real_type )+0.210526315789473684210526315789473684210526; break; } + case 2: { xi = ( real_type )+0.421052631578947368421052631578947368421053; break; } + case 3: { xi = ( real_type )+0.631578947368421052631578947368421052631579; break; } + case 4: { xi = ( real_type )+0.842105263157894736842105263157894736842105; break; } + case 5: { xi = ( real_type )+1.05263157894736842105263157894736842105263; break; } + case 6: { xi = ( real_type )+1.26315789473684210526315789473684210526316; break; } + case 7: { xi = ( real_type )+1.47368421052631578947368421052631578947368; break; } + case 8: { xi = ( real_type )+1.68421052631578947368421052631578947368421; break; } + case 9: { xi = ( real_type )+1.89473684210526315789473684210526315789474; break; } + case 10: { xi = ( real_type )+2.10526315789473684210526315789473684210526; break; } + case 11: { xi = ( real_type )+2.31578947368421052631578947368421052631579; break; } + case 12: { xi = ( real_type )+2.52631578947368421052631578947368421052632; break; } + case 13: { xi = ( real_type )+2.73684210526315789473684210526315789473684; break; } + case 14: { xi = ( real_type )+2.94736842105263157894736842105263157894737; break; } + case 15: { xi = ( real_type )+3.15789473684210526315789473684210526315789; break; } + case 16: { xi = ( real_type )+3.36842105263157894736842105263157894736842; break; } + case 17: { xi = ( real_type )+3.57894736842105263157894736842105263157895; break; } + case 18: { xi = ( real_type )+3.78947368421052631578947368421052631578947; break; } + case 19: { xi = ( real_type )+4.0; break; } + case 20: { xi = ( real_type )+4.21052631578947368421052631578947368421053; break; } + case 21: { xi = ( real_type )+4.42105263157894736842105263157894736842105; break; } + case 22: { xi = ( real_type )+4.63157894736842105263157894736842105263158; break; } + case 23: { xi = ( real_type )+4.84210526315789473684210526315789473684211; break; } + case 24: { xi = ( real_type )+5.05263157894736842105263157894736842105263; break; } + case 25: { xi = ( real_type )+5.26315789473684210526315789473684210526316; break; } + case 26: { xi = ( real_type )+5.47368421052631578947368421052631578947368; break; } + case 27: { xi = ( real_type )+5.68421052631578947368421052631578947368421; break; } + case 28: { xi = ( real_type )+5.89473684210526315789473684210526315789474; break; } + case 29: { xi = ( real_type )+6.10526315789473684210526315789473684210526; break; } + case 30: { xi = ( real_type )+6.31578947368421052631578947368421052631579; break; } + case 31: { xi = ( real_type )+6.52631578947368421052631578947368421052632; break; } + case 32: { xi = ( real_type )+6.73684210526315789473684210526315789473684; break; } + case 33: { xi = ( real_type )+6.94736842105263157894736842105263157894737; break; } + case 34: { xi = ( real_type )+7.15789473684210526315789473684210526315789; break; } + case 35: { xi = ( real_type )+7.36842105263157894736842105263157894736842; break; } + case 36: { xi = ( real_type )+7.57894736842105263157894736842105263157895; break; } + case 37: { xi = ( real_type )+7.78947368421052631578947368421052631578947; break; } + case 38: { xi = ( real_type )+8.0; break; } + case 39: { xi = ( real_type )+8.21052631578947368421052631578947368421053; break; } + case 40: { xi = ( real_type )+8.42105263157894736842105263157894736842105; break; } + case 41: { xi = ( real_type )+8.63157894736842105263157894736842105263158; break; } + case 42: { xi = ( real_type )+8.84210526315789473684210526315789473684211; break; } + case 43: { xi = ( real_type )+9.05263157894736842105263157894736842105263; break; } + case 44: { xi = ( real_type )+9.26315789473684210526315789473684210526316; break; } + case 45: { xi = ( real_type )+9.47368421052631578947368421052631578947368; break; } + case 46: { xi = ( real_type )+9.68421052631578947368421052631578947368421; break; } + case 47: { xi = ( real_type )+9.89473684210526315789473684210526315789474; break; } + case 48: { xi = ( real_type )+10.1052631578947368421052631578947368421053; break; } + case 49: { xi = ( real_type )+10.3157894736842105263157894736842105263158; break; } + case 50: { xi = ( real_type )+10.5263157894736842105263157894736842105263; break; } + case 51: { xi = ( real_type )+10.7368421052631578947368421052631578947368; break; } + case 52: { xi = ( real_type )+10.9473684210526315789473684210526315789474; break; } + case 53: { xi = ( real_type )+11.1578947368421052631578947368421052631579; break; } + case 54: { xi = ( real_type )+11.3684210526315789473684210526315789473684; break; } + case 55: { xi = ( real_type )+11.5789473684210526315789473684210526315789; break; } + case 56: { xi = ( real_type )+11.7894736842105263157894736842105263157895; break; } + case 57: { xi = ( real_type )+12.0; break; } + case 58: { xi = ( real_type )+12.2105263157894736842105263157894736842105; break; } + case 59: { xi = ( real_type )+12.4210526315789473684210526315789473684211; break; } + case 60: { xi = ( real_type )+12.6315789473684210526315789473684210526316; break; } + case 61: { xi = ( real_type )+12.8421052631578947368421052631578947368421; break; } + case 62: { xi = ( real_type )+13.0526315789473684210526315789473684210526; break; } + case 63: { xi = ( real_type )+13.2631578947368421052631578947368421052632; break; } + case 64: { xi = ( real_type )+13.4736842105263157894736842105263157894737; break; } + case 65: { xi = ( real_type )+13.6842105263157894736842105263157894736842; break; } + case 66: { xi = ( real_type )+13.8947368421052631578947368421052631578947; break; } + case 67: { xi = ( real_type )+14.1052631578947368421052631578947368421053; break; } + case 68: { xi = ( real_type )+14.3157894736842105263157894736842105263158; break; } + case 69: { xi = ( real_type )+14.5263157894736842105263157894736842105263; break; } + case 70: { xi = ( real_type )+14.7368421052631578947368421052631578947368; break; } + case 71: { xi = ( real_type )+14.9473684210526315789473684210526315789474; break; } + case 72: { xi = ( real_type )+15.1578947368421052631578947368421052631579; break; } + case 73: { xi = ( real_type )+15.3684210526315789473684210526315789473684; break; } + case 74: { xi = ( real_type )+15.5789473684210526315789473684210526315789; break; } + case 75: { xi = ( real_type )+15.7894736842105263157894736842105263157895; break; } + case 76: { xi = ( real_type )+16.0; break; } + case 77: { xi = ( real_type )+16.2105263157894736842105263157894736842105; break; } + case 78: { xi = ( real_type )+16.4210526315789473684210526315789473684211; break; } + case 79: { xi = ( real_type )+16.6315789473684210526315789473684210526316; break; } + case 80: { xi = ( real_type )+16.8421052631578947368421052631578947368421; break; } + case 81: { xi = ( real_type )+17.0526315789473684210526315789473684210526; break; } + case 82: { xi = ( real_type )+17.2631578947368421052631578947368421052632; break; } + case 83: { xi = ( real_type )+17.4736842105263157894736842105263157894737; break; } + case 84: { xi = ( real_type )+17.6842105263157894736842105263157894736842; break; } + case 85: { xi = ( real_type )+17.8947368421052631578947368421052631578947; break; } + case 86: { xi = ( real_type )+18.1052631578947368421052631578947368421053; break; } + case 87: { xi = ( real_type )+18.3157894736842105263157894736842105263158; break; } + case 88: { xi = ( real_type )+18.5263157894736842105263157894736842105263; break; } + case 89: { xi = ( real_type )+18.7368421052631578947368421052631578947368; break; } + case 90: { xi = ( real_type )+18.9473684210526315789473684210526315789474; break; } + case 91: { xi = ( real_type )+19.1578947368421052631578947368421052631579; break; } + case 92: { xi = ( real_type )+19.3684210526315789473684210526315789473684; break; } + case 93: { xi = ( real_type )+19.5789473684210526315789473684210526315789; break; } + case 94: { xi = ( real_type )+19.7894736842105263157894736842105263157895; break; } + case 95: { xi = ( real_type )+20.0; break; } + default: { xi = ( real_type )+0.0; } + }; + + return xi; +} + +SIXTRL_INLINE SIXTRL_REAL_T NS(dawson_fz_xi)( int n_interval ) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + real_type Fz; + SIXTRL_ASSERT( n_interval >= 0 ); + SIXTRL_ASSERT( n_interval < ( int )SIXTRL_CERRF_DAWSON_N_XN ); + + switch( n_interval ) + { + case 1: { Fz = ( real_type )+0.204414676423299307852205738133635368467483; break; } + case 2: { Fz = ( real_type )+0.374645391062146565792903138151666837723415; break; } + case 3: { Fz = ( real_type )+0.487620779526159032600085065955994007602486; break; } + case 4: { Fz = ( real_type )+0.537225179665383656999471044984894541890407; break; } + case 5: { Fz = ( real_type )+0.532844209232800442512093957303667626022578; break; } + case 6: { Fz = ( real_type )+0.492641603905036547612208594020732218930945; break; } + case 7: { Fz = ( real_type )+0.435738088853804929355870047140050578499873; break; } + case 8: { Fz = ( real_type )+0.376790235869403627825298481421444312322929; break; } + case 9: { Fz = ( real_type )+0.324173662408523334351848683232449479096986; break; } + case 10: { Fz = ( real_type )+0.280919842933869131514286981833391921653268; break; } + case 11: { Fz = ( real_type )+0.246774606738902005981196594245619278443541; break; } + case 12: { Fz = ( real_type )+0.220091142230898461206668437270154827354414; break; } + case 13: { Fz = ( real_type )+0.199018995238297948215403032191131604653406; break; } + case 14: { Fz = ( real_type )+0.182022563204593041262776480636221471420828; break; } + case 15: { Fz = ( real_type )+0.167986296744659197005015872159861318018257; break; } + case 16: { Fz = ( real_type )+0.156145368002923681693730536418605551404876; break; } + case 17: { Fz = ( real_type )+0.145980869408444698072701944127637946509207; break; } + case 18: { Fz = ( real_type )+0.137133914188937040815955863740722859588214; break; } + case 19: { Fz = ( real_type )+0.129348001236005115591470526257665945278322; break; } + case 20: { Fz = ( real_type )+0.122433401676389459794733090423992918624035; break; } + case 21: { Fz = ( real_type )+0.116245536536035770230943819646142346156529; break; } + case 22: { Fz = ( real_type )+0.110671559758965998392672056514515221809768; break; } + case 23: { Fz = ( real_type )+0.105621693364230228188040070800433983455213; break; } + case 24: { Fz = ( real_type )+0.101023390864058696333111557061547844543024; break; } + case 25: { Fz = ( real_type )+0.0968172599708731096134786108738627105980966; break; } + case 26: { Fz = ( real_type )+0.0929541296762774182586342664150765738247869; break; } + case 27: { Fz = ( real_type )+0.0893928904886096481956461370618202971912525; break; } + case 28: { Fz = ( real_type )+0.0860988731325164934603718599503360383247846; break; } + case 29: { Fz = ( real_type )+0.0830426115241511768702601167929215319557302; break; } + case 30: { Fz = ( real_type )+0.0801988855781815476048148063389218190822483; break; } + case 31: { Fz = ( real_type )+0.0775459713212218568287642349004942070222095; break; } + case 32: { Fz = ( real_type )+0.0750650468966655593891682810790162389606471; break; } + case 33: { Fz = ( real_type )+0.0727397173579474671216481741375405058188894; break; } + case 34: { Fz = ( real_type )+0.0705556310552950551974987896970666311765803; break; } + case 35: { Fz = ( real_type )+0.0685001674055154771379396766403437184862358; break; } + case 36: { Fz = ( real_type )+0.0665621808371330882020689517139535800601475; break; } + case 37: { Fz = ( real_type )+0.0647317893379955483199347836217465191040756; break; } + case 38: { Fz = ( real_type )+0.0630001987075533879192457295169672688411135; break; } + case 39: { Fz = ( real_type )+0.0613595556079285561738754088706845246229021; break; } + case 40: { Fz = ( real_type )+0.0598028240070970025997168069552382204455551; break; } + case 41: { Fz = ( real_type )+0.0583236807470069792151640171108127990614153; break; } + case 42: { Fz = ( real_type )+0.056916426843506876837940629331046129974024; break; } + case 43: { Fz = ( real_type )+0.0555759118010602678845137769255961249291392; break; } + case 44: { Fz = ( real_type )+0.0542974687523859076954105481536284958849097; break; } + case 45: { Fz = ( real_type )+0.0530768586471334283581234719746748841532657; break; } + case 46: { Fz = ( real_type )+0.0519102220411792443100980154384748583981615; break; } + case 47: { Fz = ( real_type )+0.0507940372987049051842427501977029926302941; break; } + case 48: { Fz = ( real_type )+0.0497250842279749621621125383083789598731151; break; } + case 49: { Fz = ( real_type )+0.0487004123398296771663508633922339236261419; break; } + case 50: { Fz = ( real_type )+0.047717313054061251201007710934289736649871; break; } + case 51: { Fz = ( real_type )+0.0467732952896975256047898303754890525957012; break; } + case 52: { Fz = ( real_type )+0.0458660639658681409761153082730452877258985; break; } + case 53: { Fz = ( real_type )+0.0449935010144609170081544834090713795542632; break; } + case 54: { Fz = ( real_type )+0.0441536485672899944916871372517739024921971; break; } + case 55: { Fz = ( real_type )+0.043344694031490533589896575933680459971779; break; } + case 56: { Fz = ( real_type )+0.0425649568093194227216046680664783565680491; break; } + case 57: { Fz = ( real_type )+0.0418128764539882603179291175888089559911153; break; } + case 58: { Fz = ( real_type )+0.0410870020829319836299372632542900813461959; break; } + case 59: { Fz = ( real_type )+0.040385982894919183194249326844771251876668; break; } + case 60: { Fz = ( real_type )+0.0397085596585740386496577961382040023237114; break; } + case 61: { Fz = ( real_type )+0.0390535570577986355292801429237067764008847; break; } + case 62: { Fz = ( real_type )+0.0384198767947746146529233779408053168285224; break; } + case 63: { Fz = ( real_type )+0.0378064913642431632452704510295471834595252; break; } + case 64: { Fz = ( real_type )+0.0372124384238227623469476082943136111737884; break; } + case 65: { Fz = ( real_type )+0.03663681569465044069256321751749903523137; break; } + case 66: { Fz = ( real_type )+0.0360787763348041155456745796410421736938601; break; } + case 67: { Fz = ( real_type )+0.0355375247350140356986075299224587181901788; break; } + case 68: { Fz = ( real_type )+0.0350123126922469469101154386321616300405276; break; } + case 69: { Fz = ( real_type )+0.03450243592202935649461976396376297559226; break; } + case 70: { Fz = ( real_type )+0.0340072308749504430827663193995038626461181; break; } + case 71: { Fz = ( real_type )+0.0335260718267648448243884603329401316619297; break; } + case 72: { Fz = ( real_type )+0.0330583682149807959933751016989241274063893; break; } + case 73: { Fz = ( real_type )+0.0326035621978688818527138577945325741429771; break; } + case 74: { Fz = ( real_type )+0.032161126414470307789361137738387026075656; break; } + case 75: { Fz = ( real_type )+0.0317305619265100815660500263462697296874182; break; } + case 76: { Fz = ( real_type )+0.03131139632518461178355335195230901508146; break; } + case 77: { Fz = ( real_type )+0.0309031819875807833185597950940297846685743; break; } + case 78: { Fz = ( real_type )+0.0305054944690889621935730957812503043992676; break; } + case 79: { Fz = ( real_type )+0.0301179310195570542608243395223315653783517; break; } + case 80: { Fz = ( real_type )+0.0297401092122126450370115395178546539000625; break; } + case 81: { Fz = ( real_type )+0.0293716656754489088365959105310074286753141; break; } + case 82: { Fz = ( real_type )+0.0290122549185626204232460038847629393025906; break; } + case 83: { Fz = ( real_type )+0.0286615482434347706561739985173263709460918; break; } + case 84: { Fz = ( real_type )+0.028319232734848067795097432772015190860891; break; } + case 85: { Fz = ( real_type )+0.0279850103229300360218746106322339780429843; break; } + case 86: { Fz = ( real_type )+0.0276585969117343482506150690312596643118447; break; } + case 87: { Fz = ( real_type )+0.0273397215685935480035125872749087135987964; break; } + case 88: { Fz = ( real_type )+0.0270281257693319151623337823932836698764564; break; } + case 89: { Fz = ( real_type )+0.0267235626949087363281474632612449247565235; break; } + case 90: { Fz = ( real_type )+0.026425796575435644735625484197407954040757; break; } + case 91: { Fz = ( real_type )+0.0261346020778743874048668010927461155846186; break; } + case 92: { Fz = ( real_type )+0.0258497637340652429455729256209668852097065; break; } + case 93: { Fz = ( real_type )+0.0255710754060192979083778000275314785319582; break; } + case 94: { Fz = ( real_type )+0.0252983397856513709121225761435040371463168; break; } + case 95: { Fz = ( real_type )+0.0250313679264036719469949523478235318685783; break; } + default: { Fz = ( real_type )0.0; } + }; + + return Fz; +} + +SIXTRL_INLINE int NS(dawson_nt_xi_abs_d10)( int n_interval ) SIXTRL_NOEXCEPT +{ + int N_Taylor = ( int )SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_ABS_D10; + + #if !defined( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) || \ + ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR != 1 ) + + switch( n_interval ) + { + case 0: { N_Taylor = ( int )25; break; } + case 1: { N_Taylor = ( int )25; break; } + case 2: { N_Taylor = ( int )25; break; } + case 3: { N_Taylor = ( int )24; break; } + case 4: { N_Taylor = ( int )25; break; } + case 5: { N_Taylor = ( int )24; break; } + case 6: { N_Taylor = ( int )24; break; } + case 7: { N_Taylor = ( int )24; break; } + case 8: { N_Taylor = ( int )24; break; } + case 9: { N_Taylor = ( int )24; break; } + case 10: { N_Taylor = ( int )24; break; } + case 11: { N_Taylor = ( int )23; break; } + case 12: { N_Taylor = ( int )24; break; } + case 13: { N_Taylor = ( int )23; break; } + case 14: { N_Taylor = ( int )24; break; } + case 15: { N_Taylor = ( int )23; break; } + case 16: { N_Taylor = ( int )23; break; } + case 17: { N_Taylor = ( int )22; break; } + case 18: { N_Taylor = ( int )22; break; } + case 19: { N_Taylor = ( int )21; break; } + case 20: { N_Taylor = ( int )21; break; } + case 21: { N_Taylor = ( int )21; break; } + case 22: { N_Taylor = ( int )21; break; } + case 23: { N_Taylor = ( int )20; break; } + case 24: { N_Taylor = ( int )19; break; } + case 25: { N_Taylor = ( int )19; break; } + case 26: { N_Taylor = ( int )19; break; } + case 27: { N_Taylor = ( int )17; break; } + case 28: { N_Taylor = ( int )17; break; } + case 29: { N_Taylor = ( int )17; break; } + case 30: { N_Taylor = ( int )16; break; } + case 31: { N_Taylor = ( int )16; break; } + case 32: { N_Taylor = ( int )15; break; } + case 33: { N_Taylor = ( int )15; break; } + case 34: { N_Taylor = ( int )15; break; } + case 35: { N_Taylor = ( int )15; break; } + case 36: { N_Taylor = ( int )15; break; } + case 37: { N_Taylor = ( int )14; break; } + case 38: { N_Taylor = ( int )14; break; } + case 39: { N_Taylor = ( int )14; break; } + case 40: { N_Taylor = ( int )14; break; } + case 41: { N_Taylor = ( int )14; break; } + case 42: { N_Taylor = ( int )14; break; } + case 43: { N_Taylor = ( int )13; break; } + case 44: { N_Taylor = ( int )13; break; } + case 45: { N_Taylor = ( int )13; break; } + case 46: { N_Taylor = ( int )13; break; } + case 47: { N_Taylor = ( int )13; break; } + case 48: { N_Taylor = ( int )13; break; } + case 49: { N_Taylor = ( int )13; break; } + case 50: { N_Taylor = ( int )13; break; } + case 51: { N_Taylor = ( int )13; break; } + case 52: { N_Taylor = ( int )12; break; } + case 53: { N_Taylor = ( int )12; break; } + case 54: { N_Taylor = ( int )12; break; } + case 55: { N_Taylor = ( int )12; break; } + case 56: { N_Taylor = ( int )12; break; } + case 57: { N_Taylor = ( int )12; break; } + case 58: { N_Taylor = ( int )12; break; } + case 59: { N_Taylor = ( int )12; break; } + case 60: { N_Taylor = ( int )12; break; } + case 61: { N_Taylor = ( int )12; break; } + case 62: { N_Taylor = ( int )12; break; } + case 63: { N_Taylor = ( int )12; break; } + case 64: { N_Taylor = ( int )12; break; } + case 65: { N_Taylor = ( int )12; break; } + case 66: { N_Taylor = ( int )12; break; } + case 67: { N_Taylor = ( int )11; break; } + case 68: { N_Taylor = ( int )11; break; } + case 69: { N_Taylor = ( int )11; break; } + case 70: { N_Taylor = ( int )11; break; } + case 71: { N_Taylor = ( int )11; break; } + case 72: { N_Taylor = ( int )11; break; } + case 73: { N_Taylor = ( int )11; break; } + case 74: { N_Taylor = ( int )11; break; } + case 75: { N_Taylor = ( int )11; break; } + case 76: { N_Taylor = ( int )11; break; } + case 77: { N_Taylor = ( int )11; break; } + case 78: { N_Taylor = ( int )11; break; } + case 79: { N_Taylor = ( int )11; break; } + case 80: { N_Taylor = ( int )11; break; } + case 81: { N_Taylor = ( int )11; break; } + case 82: { N_Taylor = ( int )11; break; } + case 83: { N_Taylor = ( int )11; break; } + case 84: { N_Taylor = ( int )11; break; } + case 85: { N_Taylor = ( int )11; break; } + case 86: { N_Taylor = ( int )11; break; } + case 87: { N_Taylor = ( int )11; break; } + case 88: { N_Taylor = ( int )11; break; } + case 89: { N_Taylor = ( int )11; break; } + case 90: { N_Taylor = ( int )10; break; } + case 91: { N_Taylor = ( int )10; break; } + case 92: { N_Taylor = ( int )10; break; } + case 93: { N_Taylor = ( int )10; break; } + case 94: { N_Taylor = ( int )10; break; } + case 95: { N_Taylor = ( int )10; break; } + default: { N_Taylor = ( int )SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_ABS_D10; } + }; + + #endif /* ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) */ + + return N_Taylor; +} + +SIXTRL_INLINE int NS(dawson_nt_xi_rel_d14)( int n_interval ) SIXTRL_NOEXCEPT +{ + int N_Taylor = ( int )SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_REL_D14; + + #if !defined( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) || \ + ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR != 1 ) + + switch( n_interval ) + { + case 0: { N_Taylor = ( int )25; break; } + case 1: { N_Taylor = ( int )25; break; } + case 2: { N_Taylor = ( int )25; break; } + case 3: { N_Taylor = ( int )24; break; } + case 4: { N_Taylor = ( int )25; break; } + case 5: { N_Taylor = ( int )24; break; } + case 6: { N_Taylor = ( int )24; break; } + case 7: { N_Taylor = ( int )24; break; } + case 8: { N_Taylor = ( int )24; break; } + case 9: { N_Taylor = ( int )24; break; } + case 10: { N_Taylor = ( int )24; break; } + case 11: { N_Taylor = ( int )23; break; } + case 12: { N_Taylor = ( int )24; break; } + case 13: { N_Taylor = ( int )23; break; } + case 14: { N_Taylor = ( int )24; break; } + case 15: { N_Taylor = ( int )23; break; } + case 16: { N_Taylor = ( int )23; break; } + case 17: { N_Taylor = ( int )22; break; } + case 18: { N_Taylor = ( int )22; break; } + case 19: { N_Taylor = ( int )21; break; } + case 20: { N_Taylor = ( int )21; break; } + case 21: { N_Taylor = ( int )21; break; } + case 22: { N_Taylor = ( int )21; break; } + case 23: { N_Taylor = ( int )20; break; } + case 24: { N_Taylor = ( int )19; break; } + case 25: { N_Taylor = ( int )19; break; } + case 26: { N_Taylor = ( int )19; break; } + case 27: { N_Taylor = ( int )17; break; } + case 28: { N_Taylor = ( int )17; break; } + case 29: { N_Taylor = ( int )17; break; } + case 30: { N_Taylor = ( int )16; break; } + case 31: { N_Taylor = ( int )16; break; } + case 32: { N_Taylor = ( int )15; break; } + case 33: { N_Taylor = ( int )15; break; } + case 34: { N_Taylor = ( int )15; break; } + case 35: { N_Taylor = ( int )15; break; } + case 36: { N_Taylor = ( int )15; break; } + case 37: { N_Taylor = ( int )14; break; } + case 38: { N_Taylor = ( int )14; break; } + case 39: { N_Taylor = ( int )14; break; } + case 40: { N_Taylor = ( int )14; break; } + case 41: { N_Taylor = ( int )14; break; } + case 42: { N_Taylor = ( int )14; break; } + case 43: { N_Taylor = ( int )13; break; } + case 44: { N_Taylor = ( int )13; break; } + case 45: { N_Taylor = ( int )13; break; } + case 46: { N_Taylor = ( int )13; break; } + case 47: { N_Taylor = ( int )13; break; } + case 48: { N_Taylor = ( int )13; break; } + case 49: { N_Taylor = ( int )13; break; } + case 50: { N_Taylor = ( int )13; break; } + case 51: { N_Taylor = ( int )13; break; } + case 52: { N_Taylor = ( int )12; break; } + case 53: { N_Taylor = ( int )12; break; } + case 54: { N_Taylor = ( int )12; break; } + case 55: { N_Taylor = ( int )12; break; } + case 56: { N_Taylor = ( int )12; break; } + case 57: { N_Taylor = ( int )12; break; } + case 58: { N_Taylor = ( int )12; break; } + case 59: { N_Taylor = ( int )12; break; } + case 60: { N_Taylor = ( int )12; break; } + case 61: { N_Taylor = ( int )12; break; } + case 62: { N_Taylor = ( int )12; break; } + case 63: { N_Taylor = ( int )12; break; } + case 64: { N_Taylor = ( int )12; break; } + case 65: { N_Taylor = ( int )12; break; } + case 66: { N_Taylor = ( int )12; break; } + case 67: { N_Taylor = ( int )11; break; } + case 68: { N_Taylor = ( int )11; break; } + case 69: { N_Taylor = ( int )11; break; } + case 70: { N_Taylor = ( int )11; break; } + case 71: { N_Taylor = ( int )11; break; } + case 72: { N_Taylor = ( int )11; break; } + case 73: { N_Taylor = ( int )11; break; } + case 74: { N_Taylor = ( int )11; break; } + case 75: { N_Taylor = ( int )11; break; } + case 76: { N_Taylor = ( int )11; break; } + case 77: { N_Taylor = ( int )11; break; } + case 78: { N_Taylor = ( int )11; break; } + case 79: { N_Taylor = ( int )11; break; } + case 80: { N_Taylor = ( int )11; break; } + case 81: { N_Taylor = ( int )11; break; } + case 82: { N_Taylor = ( int )11; break; } + case 83: { N_Taylor = ( int )11; break; } + case 84: { N_Taylor = ( int )11; break; } + case 85: { N_Taylor = ( int )11; break; } + case 86: { N_Taylor = ( int )11; break; } + case 87: { N_Taylor = ( int )11; break; } + case 88: { N_Taylor = ( int )11; break; } + case 89: { N_Taylor = ( int )11; break; } + case 90: { N_Taylor = ( int )10; break; } + case 91: { N_Taylor = ( int )10; break; } + case 92: { N_Taylor = ( int )10; break; } + case 93: { N_Taylor = ( int )10; break; } + case 94: { N_Taylor = ( int )10; break; } + case 95: { N_Taylor = ( int )10; break; } + default: { N_Taylor = ( int )SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_REL_D14; } + }; + + #endif /* ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) */ + + return N_Taylor; +} + +#endif /* N = 96, X0 = 0.0, x95 = 20.0 */ + +SIXTRL_INLINE void NS(dawson_cerrf)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_re, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_im +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + int const n_interval = NS(dawson_n_interval)( x ); + real_type dn_minus_1 = NS(dawson_fz_xi)( n_interval ); + real_type const minus_xn = -NS(dawson_xi)( n_interval ); + real_type const dz_re = x + minus_xn; + + real_type dn = ( real_type )1. + ( real_type )2. * minus_xn * dn_minus_1; + + real_type dn_plus_1 = ( real_type )0.0; + real_type dz_pow_n_re = ( real_type )1.0; + real_type dz_pow_n_im = ( real_type )0.0; + + real_type nn = ( real_type )1.0; + real_type temp = -( real_type )2. * x * y; + real_type sum_daw_z_re = dn_minus_1; + real_type sum_daw_z_im = ( real_type )0.0; + + real_type sum_re, sum_im, inv_factorial; + SIXTRL_RESULT_PTR_DEC real_type exp_sin_two_x_y; + SIXTRL_RESULT_PTR_DEC real_type exp_cos_two_x_y; + + #if ( SIXTRL_CERRF_METHOD == 0 ) || ( SIXTRL_CERRF_METHOD == 1 ) + int const N_Taylor = NS(dawson_nt_xi_abs_d10)( n_interval ); + #else /* ( SIXTRL_CERRF_METHOD > 1 ) */ + int const N_Taylor = NS(dawson_nt_xi_rel_d14)( n_interval ); + #endif /* SIXTRL_CERRF_METHOD */ + int n = 1; + + NS(sincos)( temp, &exp_sin_two_x_y, &exp_cos_two_x_y ); + + temp = ( y - x ) * ( y + x ); + temp = NS(exp)( temp ); + exp_sin_two_x_y *= temp; + exp_cos_two_x_y *= temp; + + SIXTRL_ASSERT( out_re != NULL ); + SIXTRL_ASSERT( out_im != NULL ); + + SIXTRL_ASSERT( x >= ( real_type )SIXTRL_CERRF_DAWSON_X_MIN ); + SIXTRL_ASSERT( x <= ( real_type )SIXTRL_CERRF_DAWSON_X_MAX ); + SIXTRL_ASSERT( y >= ( real_type )SIXTRL_CERRF_DAWSON_Y_MIN ); + SIXTRL_ASSERT( y <= ( real_type )SIXTRL_CERRF_DAWSON_Y_MAX ); + + for( ; n < N_Taylor ; ++n, nn += ( real_type )1.0 ) + { + temp = ( minus_xn * dn ) - ( nn * dn_minus_1 ); + dn_plus_1 = ( real_type )2. * temp; + temp = dz_pow_n_re * dz_re - dz_pow_n_im * y; + dz_pow_n_im *= dz_re; + dz_pow_n_im += dz_pow_n_re * y; + dz_pow_n_re = temp; + + sum_re = dz_pow_n_re * dn; + sum_im = dz_pow_n_im * dn; + dn_minus_1 = dn; + dn = dn_plus_1; + inv_factorial = NS(Math_inv_factorial)( n ); + + sum_daw_z_re += sum_re * inv_factorial; + sum_daw_z_im += sum_im * inv_factorial; + } + + *out_re = exp_cos_two_x_y - NS(MathConst_two_over_sqrt_pi)() * sum_daw_z_im; + *out_im = exp_sin_two_x_y + NS(MathConst_two_over_sqrt_pi)() * sum_daw_z_re; +} + +#if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + +SIXTRL_INLINE void NS(dawson_cerrf_coeff)( + SIXTRL_REAL_T in_real, SIXTRL_REAL_T in_imag, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_re, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_im, + SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 0 ) */ +) SIXTRL_NOEXCEPT +{ + int const n_interval = NS(dawson_n_interval)( x ); + real_type const minus_xn = -NS(CERRF_DAWSON_XI)[ n_interval ]; + real_type const dz_re = x + minus_xn; + real_type temp = -( real_type )2. * x * y; + real_type sum_daw_z_im; + + SIXTRL_RESULT_PTR_DEC real_type exp_sin_two_x_y; + SIXTRL_RESULT_PTR_DEC real_type exp_cos_two_x_y;s + + #if ( SIXTRL_CERRF_METHOD == 0 ) || ( SIXTRL_CERRF_METHOD == 1 ) + #if !defined( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) || \ + ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR != 1 ) + int const N_Taylor = NS(CERRF_DAWSON_NT_XI_ABS_D10)[ n_interval ]; + + #else /* ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR == 1 ) */ + int const N_Taylor = ( int )SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_ABS_D10; + + #endif /* ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) */ + #else + #if !defined( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) || \ + ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR != 1 ) + int const N_Taylor = NS(CERRF_DAWSON_NT_XI_REL_D14)[ n_interval ]; + + #else /* ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR == 1 ) */ + int const N_Taylor = ( int )SIXTRL_CERRF_DAWSON_MAX_N_TAYLOR_REL_D14; + + #endif /* ( SIXTRL_CERRF_DAWSON_USE_MAX_N_TAYLOR ) */ + #endif /* SIXTRL_CERRF_METHOD */ + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 0 ) + int jj = n_interval * ( int )SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN; + int jj_end = jj + N_Taylor; + + real_type dz_pow_n_re = dz_re; + real_type dz_pow_n_im = y; + real_type dn = Fz_kk_xi[ jj ]; + real_type sum_daw_z_re = dn; + + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF == 0 -> recursion ) */ + real_type dz_pow_n_re = ( real_type )1.0; + real_type dz_pow_n_im = ( real_type )0.0; + + real_type dn_minus_1 = NS(CERRF_DAWSON_FZ_XI)[ n_interval ]; + real_type dn_plus_1 = ( real_type )0.0; + real_type sum_daw_z_re = dn_minus_1; + real_type inv_factorial, sum_re, sum_im; + + real_type dn = ( real_type )1. + ( real_type )2. * minus_xn * dn_minus_1; + real_type nn = ( real_type )1.0; + int n = 1; + + sum_daw_z_im = ( real_type )0.0; + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 0 ) */ + + NS(sincos)( temp, &exp_sin_two_x_y, &exp_cos_two_x_y ); + + temp = ( y - x ) * ( y + x ); + temp = NS(exp)( temp ); + exp_sin_two_x_y *= temp; + exp_cos_two_x_y *= temp; + + SIXTRL_ASSERT( out_re != NULL ); + SIXTRL_ASSERT( out_im != NULL ); + + SIXTRL_ASSERT( x >= ( real_type )SIXTRL_CERRF_DAWSON_X_MIN ); + SIXTRL_ASSERT( x <= ( real_type )SIXTRL_CERRF_DAWSON_X_MAX ); + SIXTRL_ASSERT( y >= ( real_type )SIXTRL_CERRF_DAWSON_Y_MIN ); + SIXTRL_ASSERT( y <= ( real_type )SIXTRL_CERRF_DAWSON_Y_MAX ); + SIXTRL_ASSERT( N_Taylor <= ( int )SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN ); + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 0 ) + + SIXTRL_ASSERT( jj_end >= jj + ( int )2 ); + SIXTRL_ASSERT( jj_end <= jj + ( int )SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN ); + + ++jj; + dn = Fz_kk_xi[ jj++ ]; + sum_daw_z_re += dn * dz_pow_n_re; + sum_daw_z_im = dn * dz_pow_n_im; + + for( ; jj < jj_end ; ++jj ) + { + temp = dz_pow_n_re * dz_re - dz_pow_n_im * y; + dz_pow_n_im *= dz_re; + dz_pow_n_im += dz_pow_n_re * y; + dz_pow_n_re = temp; + + dn = Fz_kk_xi[ jj ]; + sum_daw_z_re += dz_pow_n_re * dn; + sum_daw_z_im += dz_pow_n_im * dn; + } + + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF == 0 ) */ + sum_daw_z_im = ( real_type )0.0; + + for( ; n < N_Taylor ; ++n, nn += ( real_type )1.0 ) + { + temp = ( minus_xn * dn ) - ( nn * dn_minus_1 ); + dn_plus_1 = ( real_type )2. * temp; + + temp = dz_pow_n_re * dz_re - dz_pow_n_im * y; + dz_pow_n_im *= dz_re; + dz_pow_n_im += dz_pow_n_re * y; + dz_pow_n_re = temp; + + sum_re = dz_pow_n_re * dn; + sum_im = dz_pow_n_im * dn; + dn_minus_1 = dn; + dn = dn_plus_1; + inv_factorial = NS(Math_inv_factorial)( n ); + + sum_daw_z_re += sum_re * inv_factorial; + sum_daw_z_im += sum_im * inv_factorial; + } + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ + + *out_re = exp_cos_two_x_y - NS(MathConst_two_over_sqrt_pi)() * sum_daw_z_im; + *out_im = exp_sin_two_x_y + NS(MathConst_two_over_sqrt_pi)() * sum_daw_z_re; +} + +#endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 0 ) */ + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +} +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ +#endif /* SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DAWSON_APPROX_H__ */ diff --git a/sixtracklib/common/be_beamfields/dawson_coeff.h b/sixtracklib/common/be_beamfields/dawson_coeff.h new file mode 100644 index 000000000..694b4232f --- /dev/null +++ b/sixtracklib/common/be_beamfields/dawson_coeff.h @@ -0,0 +1,37 @@ +#ifndef SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DAWSON_COEFF_H__ +#define SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DAWSON_COEFF_H__ + +#if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) + #include + #include +#endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/definitions.h" + #include "sixtracklib/common/be_beamfields/definitions.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +extern "C" { +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ + +SIXTRL_EXTERN SIXTRL_REAL_T const + NS(CERRF_DAWSON_XI)[ SIXTRL_CERRF_DAWSON_N_XN ]; + +SIXTRL_EXTERN SIXTRL_REAL_T const + NS(CERRF_DAWSON_FZ_XI)[ SIXTRL_CERRF_DAWSON_N_XN ]; + +SIXTRL_EXTERN SIXTRL_INT32_T const + NS(CERRF_DAWSON_NT_XI_ABS_D10)[ SIXTRL_CERRF_DAWSON_N_XN ]; + +SIXTRL_EXTERN SIXTRL_INT32_T const + NS(CERRF_DAWSON_NT_XI_REL_D14)[ SIXTRL_CERRF_DAWSON_N_XN ]; + +SIXTRL_EXTERN SIXTRL_REAL_T const + NS(CERRF_DAWSON_FZ_KK_XI)[ SIXTRL_CERRF_DAWSON_NUM_TAYLOR_COEFF ]; + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +} +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ + +#endif /* SIXTRACKLIB_COMMON_BE_BEAMFIELDS_DAWSON_COEFF_H__ */ diff --git a/sixtracklib/common/be_beamfields/dawson_coeff_xn96_ntmax25.c b/sixtracklib/common/be_beamfields/dawson_coeff_xn96_ntmax25.c new file mode 100644 index 000000000..968caacf0 --- /dev/null +++ b/sixtracklib/common/be_beamfields/dawson_coeff_xn96_ntmax25.c @@ -0,0 +1,3011 @@ +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/be_beamfields/dawson_coeff.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) && \ + defined( SIXTRL_CERRF_DAWSON_N_XN ) && \ + ( SIXTRL_CERRF_DAWSON_N_XN == 96 ) && \ + defined( SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN ) && \ + ( SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN == 25 ) && \ + ( SIXTRL_CERRF_DAWSON_NUM_TAYLOR_COEFF == 2400 ) + +typedef SIXTRL_REAL_T real_type; +typedef SIXTRL_INT32_T int_type; + +real_type const NS(CERRF_DAWSON_XI)[ SIXTRL_CERRF_DAWSON_N_XN ] = { + ( real_type )+0.0, /* n = 0 */ + ( real_type )+0.21052631578947367252396816184046, /* n = 1 */ + ( real_type )+0.42105263157894734504793632368091, /* n = 2 */ + ( real_type )+0.63157894736842101757190448552137, /* n = 3 */ + ( real_type )+0.84210526315789469009587264736183, /* n = 4 */ + ( real_type )+1.0526315789473683626198408092023, /* n = 5 */ + ( real_type )+1.2631578947368420351438089710427, /* n = 6 */ + ( real_type )+1.4736842105263157076677771328832, /* n = 7 */ + ( real_type )+1.6842105263157893801917452947237, /* n = 8 */ + ( real_type )+1.8947368421052630527157134565641, /* n = 9 */ + ( real_type )+2.1052631578947367252396816184046, /* n = 10 */ + ( real_type )+2.3157894736842106198082547052763, /* n = 11 */ + ( real_type )+2.5263157894736840702876179420855, /* n = 12 */ + ( real_type )+2.7368421052631575207669811788946, /* n = 13 */ + ( real_type )+2.9473684210526314153355542657664, /* n = 14 */ + ( real_type )+3.1578947368421053099041273526382, /* n = 15 */ + ( real_type )+3.3684210526315787603834905894473, /* n = 16 */ + ( real_type )+3.5789473684210522108628538262565, /* n = 17 */ + ( real_type )+3.7894736842105261054314269131282, /* n = 18 */ + ( real_type )+4.0, /* n = 19 */ + ( real_type )+4.2105263157894734504793632368091, /* n = 20 */ + ( real_type )+4.4210526315789469009587264736183, /* n = 21 */ + ( real_type )+4.6315789473684212396165094105527, /* n = 22 */ + ( real_type )+4.8421052631578946900958726473618, /* n = 23 */ + ( real_type )+5.052631578947368140575235884171, /* n = 24 */ + ( real_type )+5.2631578947368415910545991209801, /* n = 25 */ + ( real_type )+5.4736842105263150415339623577893, /* n = 26 */ + ( real_type )+5.6842105263157893801917452947237, /* n = 27 */ + ( real_type )+5.8947368421052628306711085315328, /* n = 28 */ + ( real_type )+6.105263157894736281150471768342, /* n = 29 */ + ( real_type )+6.3157894736842106198082547052763, /* n = 30 */ + ( real_type )+6.5263157894736840702876179420855, /* n = 31 */ + ( real_type )+6.7368421052631575207669811788946, /* n = 32 */ + ( real_type )+6.9473684210526309712463444157038, /* n = 33 */ + ( real_type )+7.1578947368421044217257076525129, /* n = 34 */ + ( real_type )+7.3684210526315787603834905894473, /* n = 35 */ + ( real_type )+7.5789473684210522108628538262565, /* n = 36 */ + ( real_type )+7.7894736842105256613422170630656, /* n = 37 */ + ( real_type )+8.0, /* n = 38 */ + ( real_type )+8.2105263157894725623009435366839, /* n = 39 */ + ( real_type )+8.4210526315789469009587264736183, /* n = 40 */ + ( real_type )+8.6315789473684212396165094105527, /* n = 41 */ + ( real_type )+8.8421052631578938019174529472366, /* n = 42 */ + ( real_type )+9.052631578947368140575235884171, /* n = 43 */ + ( real_type )+9.2631578947368424792330188211054, /* n = 44 */ + ( real_type )+9.4736842105263150415339623577893, /* n = 45 */ + ( real_type )+9.6842105263157893801917452947237, /* n = 46 */ + ( real_type )+9.8947368421052619424926888314076, /* n = 47 */ + ( real_type )+10.105263157894736281150471768342, /* n = 48 */ + ( real_type )+10.315789473684210619808254705276, /* n = 49 */ + ( real_type )+10.52631578947368318210919824196, /* n = 50 */ + ( real_type )+10.736842105263157520766981178895, /* n = 51 */ + ( real_type )+10.947368421052630083067924715579, /* n = 52 */ + ( real_type )+11.157894736842104421725707652513, /* n = 53 */ + ( real_type )+11.368421052631578760383490589447, /* n = 54 */ + ( real_type )+11.578947368421051322684434126131, /* n = 55 */ + ( real_type )+11.789473684210525661342217063066, /* n = 56 */ + ( real_type )+12.0, /* n = 57 */ + ( real_type )+12.210526315789472562300943536684, /* n = 58 */ + ( real_type )+12.421052631578946900958726473618, /* n = 59 */ + ( real_type )+12.631578947368421239616509410553, /* n = 60 */ + ( real_type )+12.842105263157893801917452947237, /* n = 61 */ + ( real_type )+13.052631578947368140575235884171, /* n = 62 */ + ( real_type )+13.263157894736840702876179420855, /* n = 63 */ + ( real_type )+13.473684210526315041533962357789, /* n = 64 */ + ( real_type )+13.684210526315789380191745294724, /* n = 65 */ + ( real_type )+13.894736842105261942492688831408, /* n = 66 */ + ( real_type )+14.105263157894736281150471768342, /* n = 67 */ + ( real_type )+14.315789473684208843451415305026, /* n = 68 */ + ( real_type )+14.52631578947368318210919824196, /* n = 69 */ + ( real_type )+14.736842105263157520766981178895, /* n = 70 */ + ( real_type )+14.947368421052630083067924715579, /* n = 71 */ + ( real_type )+15.157894736842104421725707652513, /* n = 72 */ + ( real_type )+15.368421052631578760383490589447, /* n = 73 */ + ( real_type )+15.578947368421051322684434126131, /* n = 74 */ + ( real_type )+15.789473684210525661342217063066, /* n = 75 */ + ( real_type )+16.0, /* n = 76 */ + ( real_type )+16.210526315789472562300943536684, /* n = 77 */ + ( real_type )+16.421052631578945124601887073368, /* n = 78 */ + ( real_type )+16.631578947368421239616509410553, /* n = 79 */ + ( real_type )+16.842105263157893801917452947237, /* n = 80 */ + ( real_type )+17.052631578947366364218396483921, /* n = 81 */ + ( real_type )+17.263157894736842479233018821105, /* n = 82 */ + ( real_type )+17.473684210526315041533962357789, /* n = 83 */ + ( real_type )+17.684210526315787603834905894473, /* n = 84 */ + ( real_type )+17.894736842105263718849528231658, /* n = 85 */ + ( real_type )+18.105263157894736281150471768342, /* n = 86 */ + ( real_type )+18.315789473684208843451415305026, /* n = 87 */ + ( real_type )+18.526315789473684958466037642211, /* n = 88 */ + ( real_type )+18.736842105263157520766981178895, /* n = 89 */ + ( real_type )+18.947368421052630083067924715579, /* n = 90 */ + ( real_type )+19.157894736842102645368868252262, /* n = 91 */ + ( real_type )+19.368421052631578760383490589447, /* n = 92 */ + ( real_type )+19.578947368421051322684434126131, /* n = 93 */ + ( real_type )+19.789473684210523884985377662815, /* n = 94 */ + ( real_type )+20.0, /* n = 95 */ +}; + +real_type const NS(CERRF_DAWSON_FZ_XI)[ SIXTRL_CERRF_DAWSON_N_XN ] = { + ( real_type )+0.0, /* n = 0 */ + ( real_type )+0.20441467642329930785220573813364, /* n = 1 */ + ( real_type )+0.37464539106214656579290313815167, /* n = 2 */ + ( real_type )+0.48762077952615903260008506595599, /* n = 3 */ + ( real_type )+0.53722517966538365699947104498489, /* n = 4 */ + ( real_type )+0.53284420923280044251209395730367, /* n = 5 */ + ( real_type )+0.49264160390503654761220859402073, /* n = 6 */ + ( real_type )+0.43573808885380492935587004714005, /* n = 7 */ + ( real_type )+0.37679023586940362782529848142144, /* n = 8 */ + ( real_type )+0.32417366240852333435184868323245, /* n = 9 */ + ( real_type )+0.28091984293386913151428698183339, /* n = 10 */ + ( real_type )+0.24677460673890200598119659424562, /* n = 11 */ + ( real_type )+0.22009114223089846120666843727015, /* n = 12 */ + ( real_type )+0.19901899523829794821540303219113, /* n = 13 */ + ( real_type )+0.18202256320459304126277648063622, /* n = 14 */ + ( real_type )+0.16798629674465919700501587215986, /* n = 15 */ + ( real_type )+0.15614536800292368169373053641861, /* n = 16 */ + ( real_type )+0.14598086940844469807270194412764, /* n = 17 */ + ( real_type )+0.13713391418893704081595586374072, /* n = 18 */ + ( real_type )+0.12934800123600511559147052625767, /* n = 19 */ + ( real_type )+0.12243340167638945979473309042399, /* n = 20 */ + ( real_type )+0.11624553653603577023094381964614, /* n = 21 */ + ( real_type )+0.11067155975896599839267205651452, /* n = 22 */ + ( real_type )+0.10562169336423022818804007080043, /* n = 23 */ + ( real_type )+0.10102339086405869633311155706155, /* n = 24 */ + ( real_type )+0.096817259970873109613478610873863, /* n = 25 */ + ( real_type )+0.092954129676277418258634266415077, /* n = 26 */ + ( real_type )+0.08939289048860964819564613706182, /* n = 27 */ + ( real_type )+0.086098873132516493460371859950336, /* n = 28 */ + ( real_type )+0.083042611524151176870260116792922, /* n = 29 */ + ( real_type )+0.080198885578181547604814806338922, /* n = 30 */ + ( real_type )+0.077545971321221856828764234900494, /* n = 31 */ + ( real_type )+0.075065046896665559389168281079016, /* n = 32 */ + ( real_type )+0.072739717357947467121648174137541, /* n = 33 */ + ( real_type )+0.070555631055295055197498789697067, /* n = 34 */ + ( real_type )+0.068500167405515477137939676640344, /* n = 35 */ + ( real_type )+0.066562180837133088202068951713954, /* n = 36 */ + ( real_type )+0.064731789337995548319934783621747, /* n = 37 */ + ( real_type )+0.063000198707553387919245729516967, /* n = 38 */ + ( real_type )+0.061359555607928556173875408870685, /* n = 39 */ + ( real_type )+0.059802824007097002599716806955238, /* n = 40 */ + ( real_type )+0.058323680747006979215164017110813, /* n = 41 */ + ( real_type )+0.056916426843506876837940629331046, /* n = 42 */ + ( real_type )+0.055575911801060267884513776925596, /* n = 43 */ + ( real_type )+0.054297468752385907695410548153628, /* n = 44 */ + ( real_type )+0.053076858647133428358123471974675, /* n = 45 */ + ( real_type )+0.051910222041179244310098015438475, /* n = 46 */ + ( real_type )+0.050794037298704905184242750197703, /* n = 47 */ + ( real_type )+0.049725084227974962162112538308379, /* n = 48 */ + ( real_type )+0.048700412339829677166350863392234, /* n = 49 */ + ( real_type )+0.04771731305406125120100771093429, /* n = 50 */ + ( real_type )+0.046773295289697525604789830375489, /* n = 51 */ + ( real_type )+0.045866063965868140976115308273045, /* n = 52 */ + ( real_type )+0.044993501014460917008154483409071, /* n = 53 */ + ( real_type )+0.044153648567289994491687137251774, /* n = 54 */ + ( real_type )+0.04334469403149053358989657593368, /* n = 55 */ + ( real_type )+0.042564956809319422721604668066478, /* n = 56 */ + ( real_type )+0.041812876453988260317929117588809, /* n = 57 */ + ( real_type )+0.04108700208293198362993726325429, /* n = 58 */ + ( real_type )+0.040385982894919183194249326844771, /* n = 59 */ + ( real_type )+0.039708559658574038649657796138204, /* n = 60 */ + ( real_type )+0.039053557057798635529280142923707, /* n = 61 */ + ( real_type )+0.038419876794774614652923377940805, /* n = 62 */ + ( real_type )+0.037806491364243163245270451029547, /* n = 63 */ + ( real_type )+0.037212438423822762346947608294314, /* n = 64 */ + ( real_type )+0.036636815694650440692563217517499, /* n = 65 */ + ( real_type )+0.036078776334804115545674579641042, /* n = 66 */ + ( real_type )+0.035537524735014035698607529922459, /* n = 67 */ + ( real_type )+0.035012312692246946910115438632162, /* n = 68 */ + ( real_type )+0.034502435922029356494619763963763, /* n = 69 */ + ( real_type )+0.034007230874950443082766319399504, /* n = 70 */ + ( real_type )+0.03352607182676484482438846033294, /* n = 71 */ + ( real_type )+0.033058368214980795993375101698924, /* n = 72 */ + ( real_type )+0.032603562197868881852713857794533, /* n = 73 */ + ( real_type )+0.032161126414470307789361137738387, /* n = 74 */ + ( real_type )+0.03173056192651008156605002634627, /* n = 75 */ + ( real_type )+0.031311396325184611783553351952309, /* n = 76 */ + ( real_type )+0.03090318198758078331855979509403, /* n = 77 */ + ( real_type )+0.03050549446908896219357309578125, /* n = 78 */ + ( real_type )+0.030117931019557054260824339522332, /* n = 79 */ + ( real_type )+0.029740109212212645037011539517855, /* n = 80 */ + ( real_type )+0.029371665675448908836595910531007, /* n = 81 */ + ( real_type )+0.029012254918562620423246003884763, /* n = 82 */ + ( real_type )+0.028661548243434770656173998517326, /* n = 83 */ + ( real_type )+0.028319232734848067795097432772015, /* n = 84 */ + ( real_type )+0.027985010322930036021874610632234, /* n = 85 */ + ( real_type )+0.02765859691173434825061506903126, /* n = 86 */ + ( real_type )+0.027339721568593548003512587274909, /* n = 87 */ + ( real_type )+0.027028125769331915162333782393284, /* n = 88 */ + ( real_type )+0.026723562694908736328147463261245, /* n = 89 */ + ( real_type )+0.026425796575435644735625484197408, /* n = 90 */ + ( real_type )+0.026134602077874387404866801092746, /* n = 91 */ + ( real_type )+0.025849763734065242945572925620967, /* n = 92 */ + ( real_type )+0.025571075406019297908377800027531, /* n = 93 */ + ( real_type )+0.025298339785651370912122576143504, /* n = 94 */ + ( real_type )+0.025031367926403671946994952347824, /* n = 95 */ +}; + +int_type const NS(CERRF_DAWSON_NT_XI_ABS_D10)[ SIXTRL_CERRF_DAWSON_N_XN ] = { + ( int_type )19, /* n = 0 */ + ( int_type )19, /* n = 1 */ + ( int_type )19, /* n = 2 */ + ( int_type )19, /* n = 3 */ + ( int_type )19, /* n = 4 */ + ( int_type )19, /* n = 5 */ + ( int_type )18, /* n = 6 */ + ( int_type )18, /* n = 7 */ + ( int_type )18, /* n = 8 */ + ( int_type )18, /* n = 9 */ + ( int_type )17, /* n = 10 */ + ( int_type )18, /* n = 11 */ + ( int_type )17, /* n = 12 */ + ( int_type )17, /* n = 13 */ + ( int_type )17, /* n = 14 */ + ( int_type )16, /* n = 15 */ + ( int_type )16, /* n = 16 */ + ( int_type )15, /* n = 17 */ + ( int_type )15, /* n = 18 */ + ( int_type )14, /* n = 19 */ + ( int_type )14, /* n = 20 */ + ( int_type )14, /* n = 21 */ + ( int_type )12, /* n = 22 */ + ( int_type )12, /* n = 23 */ + ( int_type )12, /* n = 24 */ + ( int_type )11, /* n = 25 */ + ( int_type )11, /* n = 26 */ + ( int_type )10, /* n = 27 */ + ( int_type )10, /* n = 28 */ + ( int_type )10, /* n = 29 */ + ( int_type )10, /* n = 30 */ + ( int_type )9, /* n = 31 */ + ( int_type )9, /* n = 32 */ + ( int_type )9, /* n = 33 */ + ( int_type )9, /* n = 34 */ + ( int_type )9, /* n = 35 */ + ( int_type )9, /* n = 36 */ + ( int_type )9, /* n = 37 */ + ( int_type )8, /* n = 38 */ + ( int_type )8, /* n = 39 */ + ( int_type )8, /* n = 40 */ + ( int_type )8, /* n = 41 */ + ( int_type )8, /* n = 42 */ + ( int_type )8, /* n = 43 */ + ( int_type )8, /* n = 44 */ + ( int_type )8, /* n = 45 */ + ( int_type )8, /* n = 46 */ + ( int_type )8, /* n = 47 */ + ( int_type )8, /* n = 48 */ + ( int_type )8, /* n = 49 */ + ( int_type )7, /* n = 50 */ + ( int_type )7, /* n = 51 */ + ( int_type )7, /* n = 52 */ + ( int_type )7, /* n = 53 */ + ( int_type )7, /* n = 54 */ + ( int_type )7, /* n = 55 */ + ( int_type )7, /* n = 56 */ + ( int_type )7, /* n = 57 */ + ( int_type )7, /* n = 58 */ + ( int_type )7, /* n = 59 */ + ( int_type )7, /* n = 60 */ + ( int_type )7, /* n = 61 */ + ( int_type )7, /* n = 62 */ + ( int_type )7, /* n = 63 */ + ( int_type )7, /* n = 64 */ + ( int_type )7, /* n = 65 */ + ( int_type )7, /* n = 66 */ + ( int_type )7, /* n = 67 */ + ( int_type )7, /* n = 68 */ + ( int_type )7, /* n = 69 */ + ( int_type )6, /* n = 70 */ + ( int_type )6, /* n = 71 */ + ( int_type )6, /* n = 72 */ + ( int_type )6, /* n = 73 */ + ( int_type )6, /* n = 74 */ + ( int_type )6, /* n = 75 */ + ( int_type )6, /* n = 76 */ + ( int_type )6, /* n = 77 */ + ( int_type )6, /* n = 78 */ + ( int_type )6, /* n = 79 */ + ( int_type )6, /* n = 80 */ + ( int_type )6, /* n = 81 */ + ( int_type )6, /* n = 82 */ + ( int_type )6, /* n = 83 */ + ( int_type )6, /* n = 84 */ + ( int_type )6, /* n = 85 */ + ( int_type )6, /* n = 86 */ + ( int_type )6, /* n = 87 */ + ( int_type )6, /* n = 88 */ + ( int_type )6, /* n = 89 */ + ( int_type )6, /* n = 90 */ + ( int_type )6, /* n = 91 */ + ( int_type )6, /* n = 92 */ + ( int_type )6, /* n = 93 */ + ( int_type )6, /* n = 94 */ + ( int_type )6, /* n = 95 */ +}; + +int_type const NS(CERRF_DAWSON_NT_XI_REL_D14)[ SIXTRL_CERRF_DAWSON_N_XN ] = { + ( int_type )25, /* n = 0 */ + ( int_type )25, /* n = 1 */ + ( int_type )25, /* n = 2 */ + ( int_type )24, /* n = 3 */ + ( int_type )25, /* n = 4 */ + ( int_type )24, /* n = 5 */ + ( int_type )24, /* n = 6 */ + ( int_type )24, /* n = 7 */ + ( int_type )24, /* n = 8 */ + ( int_type )24, /* n = 9 */ + ( int_type )24, /* n = 10 */ + ( int_type )23, /* n = 11 */ + ( int_type )24, /* n = 12 */ + ( int_type )23, /* n = 13 */ + ( int_type )24, /* n = 14 */ + ( int_type )23, /* n = 15 */ + ( int_type )23, /* n = 16 */ + ( int_type )22, /* n = 17 */ + ( int_type )22, /* n = 18 */ + ( int_type )21, /* n = 19 */ + ( int_type )21, /* n = 20 */ + ( int_type )21, /* n = 21 */ + ( int_type )21, /* n = 22 */ + ( int_type )20, /* n = 23 */ + ( int_type )19, /* n = 24 */ + ( int_type )19, /* n = 25 */ + ( int_type )19, /* n = 26 */ + ( int_type )17, /* n = 27 */ + ( int_type )17, /* n = 28 */ + ( int_type )17, /* n = 29 */ + ( int_type )16, /* n = 30 */ + ( int_type )16, /* n = 31 */ + ( int_type )15, /* n = 32 */ + ( int_type )15, /* n = 33 */ + ( int_type )15, /* n = 34 */ + ( int_type )15, /* n = 35 */ + ( int_type )15, /* n = 36 */ + ( int_type )14, /* n = 37 */ + ( int_type )14, /* n = 38 */ + ( int_type )14, /* n = 39 */ + ( int_type )14, /* n = 40 */ + ( int_type )14, /* n = 41 */ + ( int_type )14, /* n = 42 */ + ( int_type )13, /* n = 43 */ + ( int_type )13, /* n = 44 */ + ( int_type )13, /* n = 45 */ + ( int_type )13, /* n = 46 */ + ( int_type )13, /* n = 47 */ + ( int_type )13, /* n = 48 */ + ( int_type )13, /* n = 49 */ + ( int_type )13, /* n = 50 */ + ( int_type )13, /* n = 51 */ + ( int_type )12, /* n = 52 */ + ( int_type )12, /* n = 53 */ + ( int_type )12, /* n = 54 */ + ( int_type )12, /* n = 55 */ + ( int_type )12, /* n = 56 */ + ( int_type )12, /* n = 57 */ + ( int_type )12, /* n = 58 */ + ( int_type )12, /* n = 59 */ + ( int_type )12, /* n = 60 */ + ( int_type )12, /* n = 61 */ + ( int_type )12, /* n = 62 */ + ( int_type )12, /* n = 63 */ + ( int_type )12, /* n = 64 */ + ( int_type )12, /* n = 65 */ + ( int_type )12, /* n = 66 */ + ( int_type )11, /* n = 67 */ + ( int_type )11, /* n = 68 */ + ( int_type )11, /* n = 69 */ + ( int_type )11, /* n = 70 */ + ( int_type )11, /* n = 71 */ + ( int_type )11, /* n = 72 */ + ( int_type )11, /* n = 73 */ + ( int_type )11, /* n = 74 */ + ( int_type )11, /* n = 75 */ + ( int_type )11, /* n = 76 */ + ( int_type )11, /* n = 77 */ + ( int_type )11, /* n = 78 */ + ( int_type )11, /* n = 79 */ + ( int_type )11, /* n = 80 */ + ( int_type )11, /* n = 81 */ + ( int_type )11, /* n = 82 */ + ( int_type )11, /* n = 83 */ + ( int_type )11, /* n = 84 */ + ( int_type )11, /* n = 85 */ + ( int_type )11, /* n = 86 */ + ( int_type )11, /* n = 87 */ + ( int_type )11, /* n = 88 */ + ( int_type )11, /* n = 89 */ + ( int_type )10, /* n = 90 */ + ( int_type )10, /* n = 91 */ + ( int_type )10, /* n = 92 */ + ( int_type )10, /* n = 93 */ + ( int_type )10, /* n = 94 */ + ( int_type )10, /* n = 95 */ +}; + +real_type const NS(CERRF_DAWSON_FZ_KK_XI)[ SIXTRL_CERRF_DAWSON_NUM_TAYLOR_COEFF ] = { + /* ===== n = 0, xn = 0.00, yn = 0 ==== */ + ( real_type )+0.0, /* F^(00)(x_00) / ( 0!) */ + ( real_type )+1.0, /* F^(01)(x_00) / ( 1!) */ + ( real_type )+0.0, /* F^(02)(x_00) / ( 2!) */ + ( real_type )-0.111111111111111111111111111111111111111111, /* F^(03)(x_00) / ( 3!) */ + ( real_type )+0.0, /* F^(04)(x_00) / ( 4!) */ + ( real_type )+0.00222222222222222222222222222222222222222222, /* F^(05)(x_00) / ( 5!) */ + ( real_type )+0.0, /* F^(06)(x_00) / ( 6!) */ + ( real_type )-0.00001511715797430083144368858654572940287226, /* F^(07)(x_00) / ( 7!) */ + ( real_type )+0.0, /* F^(08)(x_00) / ( 8!) */ + ( real_type )+0.000000046657894982409973591631439955954947136605, /* F^(09)(x_00) / ( 9!) */ + ( real_type )+0.0, /* F^(10)(x_00) / (10!) */ + ( real_type )-0.000000000077120487574231361308481718935462722539843, /* F^(11)(x_00) / (11!) */ + ( real_type )+0.0, /* F^(12)(x_00) / (12!) */ + ( real_type )+7.60557076668948336375559358337896671990562e-14, /* F^(13)(x_00) / (13!) */ + ( real_type )+0.0, /* F^(14)(x_00) / (14!) */ + ( real_type )-4.82893382012030689762259910055807410787658e-17, /* F^(15)(x_00) / (15!) */ + ( real_type )+0.0, /* F^(16)(x_00) / (16!) */ + ( real_type )+2.08863919555376595917932487048359606742067e-20, /* F^(17)(x_00) / (17!) */ + ( real_type )+0.0, /* F^(18)(x_00) / (18!) */ + ( real_type )-6.42856015867579550378370227911233015518826e-24, /* F^(19)(x_00) / (19!) */ + ( real_type )+0.0, /* F^(20)(x_00) / (20!) */ + ( real_type )+1.45772339198997630471285765966265989913566e-27, /* F^(21)(x_00) / (21!) */ + ( real_type )+0.0, /* F^(22)(x_00) / (22!) */ + ( real_type )-2.50510979891729902854933435240189018583203e-31, /* F^(23)(x_00) / (23!) */ + ( real_type )+0.0, /* F^(24)(x_00) / (24!) */ + + /* ===== n = 1, xn = 0.21, yn = 0 ==== */ + ( real_type )+0.204414676423299307852205738133635368467483, /* F^(00)(x_01) / ( 0!) */ + ( real_type )+0.91393066255861082252424769629000686686398, /* F^(01)(x_01) / ( 1!) */ + ( real_type )-0.198410565849398156219882287184217271197233, /* F^(02)(x_01) / ( 2!) */ + ( real_type )-0.0922654857416281898614832710292364754630676, /* F^(03)(x_01) / ( 3!) */ + ( real_type )+0.0106951460088554894409087778940858415773282, /* F^(04)(x_01) / ( 4!) */ + ( real_type )+0.00166518093994657661664509429879735764632321, /* F^(05)(x_01) / ( 5!) */ + ( real_type )-0.000138310756121161304012541781443233156444399, /* F^(06)(x_01) / ( 6!) */ + ( real_type )-0.0000101392694994905422857940322668621210471459, /* F^(07)(x_01) / ( 7!) */ + ( real_type )+0.000000684164453075140964398716193419639948224106, /* F^(08)(x_01) / ( 8!) */ + ( real_type )+0.0000000277376312527478547408459877153200446411114, /* F^(09)(x_01) / ( 9!) */ + ( real_type )-0.00000000163715547760545041389650770468670092107623, /* F^(10)(x_01) / (10!) */ + ( real_type )-0.000000000040150393623139732679898118369825541119215, /* F^(11)(x_01) / (11!) */ + ( real_type )+0.00000000000218451431379065031923725428039204544575731, /* F^(12)(x_01) / (12!) */ + ( real_type )+3.41534720117151308738394818162834509968719e-14, /* F^(13)(x_01) / (13!) */ + ( real_type )-1.78805884931965391937374038611597936871201e-15, /* F^(14)(x_01) / (14!) */ + ( real_type )-1.83386690947597952439907830845557790467828e-17, /* F^(15)(x_01) / (15!) */ + ( real_type )+9.61442935382648358598149021692013450612964e-19, /* F^(16)(x_01) / (16!) */ + ( real_type )+6.53119570515267296715704810458732225008521e-21, /* F^(17)(x_01) / (17!) */ + ( real_type )-3.57595403436545895930853220055087764766381e-22, /* F^(18)(x_01) / (18!) */ + ( real_type )-1.59313552915827515021951407828389462041065e-24, /* F^(19)(x_01) / (19!) */ + ( real_type )+9.57810383034681568889431913695377478160418e-26, /* F^(20)(x_01) / (20!) */ + ( real_type )+2.69806563906549417930642897313385075238486e-28, /* F^(21)(x_01) / (21!) */ + ( real_type )-1.9081831429131145329625755605237598464658e-29, /* F^(22)(x_01) / (22!) */ + ( real_type )-3.11784765736642961341661021164584575373153e-32, /* F^(23)(x_01) / (23!) */ + ( real_type )+2.90350255052018875816809170874702042780005e-33, /* F^(24)(x_01) / (24!) */ + + /* ===== n = 2, xn = 0.42, yn = 0 ==== */ + ( real_type )+0.374645391062146565792903138151666837723415, /* F^(00)(x_02) / ( 0!) */ + ( real_type )+0.684509144368718698950815851109794190038472, /* F^(01)(x_02) / ( 1!) */ + ( real_type )-0.331429883819224579939172667787192053223398, /* F^(02)(x_02) / ( 2!) */ + ( real_type )-0.0450455883151933467567696429148889496401234, /* F^(03)(x_02) / ( 3!) */ + ( real_type )+0.0161803989301094457916534494551265432510126, /* F^(04)(x_02) / ( 4!) */ + ( real_type )+0.000355887802342285633460921196361315684296813, /* F^(05)(x_02) / ( 5!) */ + ( real_type )-0.000188107071208053154730773522113773389738648, /* F^(06)(x_02) / ( 6!) */ + ( real_type )+0.000000811769127634487608342893881358177870821899, /* F^(07)(x_02) / ( 7!) */ + ( real_type )+0.000000829082538018204566809440844950365616675594, /* F^(08)(x_02) / ( 8!) */ + ( real_type )-0.0000000111249018613093492347021319283166113240414, /* F^(09)(x_02) / ( 9!) */ + ( real_type )-0.00000000174872225594521797297600730414659517926027, /* F^(10)(x_02) / (10!) */ + ( real_type )+0.0000000000305585833702040074407598296711992258857965, /* F^(11)(x_02) / (11!) */ + ( real_type )+0.00000000000202927748048785147401883189399538472077027, /* F^(12)(x_02) / (12!) */ + ( real_type )-4.0248298669194948202329587410974379454941e-14, /* F^(13)(x_02) / (13!) */ + ( real_type )-1.41991444549586481143829570745800300889373e-15, /* F^(14)(x_02) / (14!) */ + ( real_type )+3.08687750245694524944825297201180242083163e-17, /* F^(15)(x_02) / (15!) */ + ( real_type )+6.37996750588626571371081785109479850644925e-19, /* F^(16)(x_02) / (16!) */ + ( real_type )-1.5210578891416950327750568372866635280638e-20, /* F^(17)(x_02) / (17!) */ + ( real_type )-1.92128187362901802666150555357641844366934e-22, /* F^(18)(x_02) / (18!) */ + ( real_type )+5.12979634085273062602889132801155413772297e-24, /* F^(19)(x_02) / (19!) */ + ( real_type )+3.97604780621263052463175337985739574710928e-26, /* F^(20)(x_02) / (20!) */ + ( real_type )-1.23914317897293340948580683760094243562159e-27, /* F^(21)(x_02) / (21!) */ + ( real_type )-5.66781653630923381224248652127038713116064e-30, /* F^(22)(x_02) / (22!) */ + ( real_type )+2.21970279853439819428285307333493674461438e-31, /* F^(23)(x_02) / (23!) */ + ( real_type )+5.31130678687090944611450306158910569875354e-34, /* F^(24)(x_02) / (24!) */ + + /* ===== n = 3, xn = 0.63, yn = 0 ==== */ + ( real_type )+0.487620779526159032600085065955994007602486, /* F^(00)(x_03) / ( 0!) */ + ( real_type )+0.384057962703799150907333603988779433266063, /* F^(01)(x_03) / ( 1!) */ + ( real_type )-0.365091851669542399327779571326230663332197, /* F^(02)(x_03) / ( 2!) */ + ( real_type )+0.00856785467074171479665096750978176866975057, /* F^(03)(x_03) / ( 3!) */ + ( real_type )+0.0145357509069618505957683995035648975612606, /* F^(04)(x_03) / ( 4!) */ + ( real_type )-0.000905795033977117232633927105007213766740058, /* F^(05)(x_03) / ( 5!) */ + ( real_type )-0.000129726061516753292618986825266002818382292, /* F^(06)(x_03) / ( 6!) */ + ( real_type )+0.00000950604442347707267100686323339384090267369, /* F^(07)(x_03) / ( 7!) */ + ( real_type )+0.000000391514905368007166288124204669879409535135, /* F^(08)(x_03) / ( 8!) */ + ( real_type )-0.0000000354451388826174572167608488385294521036245, /* F^(09)(x_03) / ( 9!) */ + ( real_type )-0.00000000042230505294379543772690507908366313372887, /* F^(10)(x_03) / (10!) */ + ( real_type )+0.0000000000629955846127000554008323600051861272851109, /* F^(11)(x_03) / (11!) */ + ( real_type )-1.9379396793214615530322634609178450781386e-14, /* F^(12)(x_03) / (12!) */ + ( real_type )-6.19809755269946095782393667020484796009164e-14, /* F^(13)(x_03) / (13!) */ + ( real_type )+4.1465920520871834473032981642862989189074e-16, /* F^(14)(x_03) / (14!) */ + ( real_type )+3.702508900711221065794191521941520325027e-17, /* F^(15)(x_03) / (15!) */ + ( real_type )-3.98657919962879553134617722346845705647894e-19, /* F^(16)(x_03) / (16!) */ + ( real_type )-1.42718623771229997393007221526390210027813e-20, /* F^(17)(x_03) / (17!) */ + ( real_type )+2.00396751217899386438034473245059837096976e-22, /* F^(18)(x_03) / (18!) */ + ( real_type )+3.69149514672159417438462388606817145711003e-24, /* F^(19)(x_03) / (19!) */ + ( real_type )-6.43933402575153984399000567285466608951549e-26, /* F^(20)(x_03) / (20!) */ + ( real_type )-6.52631652057603467545333190803571852499144e-28, /* F^(21)(x_03) / (21!) */ + ( real_type )+1.43741237516446968148675903533080636880221e-29, /* F^(22)(x_03) / (22!) */ + ( real_type )+7.78324428927041735773759673312162756460368e-32, /* F^(23)(x_03) / (23!) */ + ( real_type )-2.34069178680107098745680043427798690018813e-33, /* F^(24)(x_03) / (24!) */ + + /* ===== n = 4, xn = 0.84, yn = 0 ==== */ + ( real_type )+0.537225179665383656999471044984894541890407, /* F^(00)(x_04) / ( 0!) */ + ( real_type )+0.0951996974056696805431867766903154277388279, /* F^(01)(x_04) / ( 1!) */ + ( real_type )-0.308696672950868533871753196948953536155745, /* F^(02)(x_04) / ( 2!) */ + ( real_type )+0.047190054290760635118246241221478959671783, /* F^(03)(x_04) / ( 3!) */ + ( real_type )+0.00789498723708331531497161118419256999168001, /* F^(04)(x_04) / ( 4!) */ + ( real_type )-0.00147567391020819391511182373458437573432517, /* F^(05)(x_04) / ( 5!) */ + ( real_type )-0.0000186847044958640259232884921286547538877012, /* F^(06)(x_04) / ( 6!) */ + ( real_type )+0.0000106808220284850092288536478092559904640765, /* F^(07)(x_04) / ( 7!) */ + ( real_type )-0.000000197660404550964397247132939775733163458599, /* F^(08)(x_04) / ( 8!) */ + ( real_type )-0.000000028855602137539268015764772270219943690106, /* F^(09)(x_04) / ( 9!) */ + ( real_type )+0.000000000925234432078827825883666094563035916928936, /* F^(10)(x_04) / (10!) */ + ( real_type )+0.0000000000348167839478585166051219360907572612736362, /* F^(11)(x_04) / (11!) */ + ( real_type )-0.00000000000157543914037219745309226734079775711675278, /* F^(12)(x_04) / (12!) */ + ( real_type )-1.86356576382149860528835258302316927939912e-14, /* F^(13)(x_04) / (13!) */ + ( real_type )+1.39674297512271620641905298200755885943749e-15, /* F^(14)(x_04) / (14!) */ + ( real_type )+1.37701148533321043056697343388954837328402e-18, /* F^(15)(x_04) / (15!) */ + ( real_type )-7.36529585630799846681814748026165425469773e-19, /* F^(16)(x_04) / (16!) */ + ( real_type )+3.69669358268425041210747462674301049670499e-21, /* F^(17)(x_04) / (17!) */ + ( real_type )+2.4822385568350496943541921814303795743077e-22, /* F^(18)(x_04) / (18!) */ + ( real_type )-2.29585862059338395715338256341318075878658e-24, /* F^(19)(x_04) / (19!) */ + ( real_type )-5.5655294145792323200025573321570584309174e-26, /* F^(20)(x_04) / (20!) */ + ( real_type )+7.33154408854610784357152740430169281046743e-28, /* F^(21)(x_04) / (21!) */ + ( real_type )+8.40024345362274416341569814651635384534863e-30, /* F^(22)(x_04) / (22!) */ + ( real_type )-1.52737578928490251392155450742826204209444e-31, /* F^(23)(x_04) / (23!) */ + ( real_type )-8.21551587315247026067284459903635144785149e-34, /* F^(24)(x_04) / (24!) */ + + /* ===== n = 5, xn = 1.05, yn = 0 ==== */ + ( real_type )+0.532844209232800442512093957303667626022578, /* F^(00)(x_05) / ( 0!) */ + ( real_type )-0.121777282595369290385891528171987664988757, /* F^(01)(x_05) / ( 1!) */ + ( real_type )-0.20232879798725849303189204313140153297696, /* F^(02)(x_05) / ( 2!) */ + ( real_type )+0.0608591829754523730371195357903567777262678, /* F^(03)(x_05) / ( 3!) */ + ( real_type )+0.00042257934919028330719424401259413463279533, /* F^(04)(x_05) / ( 4!) */ + ( real_type )-0.00125276928891454500042545411391120361318525, /* F^(05)(x_05) / ( 5!) */ + ( real_type )+0.000068566035822712929971689023199279585075625, /* F^(06)(x_05) / ( 6!) */ + ( real_type )+0.00000557633089532789254143304527257229319613239, /* F^(07)(x_05) / ( 7!) */ + ( real_type )-0.000000489530311554852110414925072484716340731518, /* F^(08)(x_05) / ( 8!) */ + ( real_type )-0.00000000448756289188655376360338429212215390247952, /* F^(09)(x_05) / ( 9!) */ + ( real_type )+0.00000000118232014503880347725057599141034637254044, /* F^(10)(x_05) / (10!) */ + ( real_type )-0.000000000013153574081071436347565064961235902249346, /* F^(11)(x_05) / (11!) */ + ( real_type )-0.00000000000130052475132253789420082657864332918377298, /* F^(12)(x_05) / (12!) */ + ( real_type )+2.91728354540520278150869063105797909543789e-14, /* F^(13)(x_05) / (13!) */ + ( real_type )+7.07470588163046070352049340842678849729824e-16, /* F^(14)(x_05) / (14!) */ + ( real_type )-2.51420430513831718358871554955705732767532e-17, /* F^(15)(x_05) / (15!) */ + ( real_type )-1.61714042206220509995118340289956201613635e-19, /* F^(16)(x_05) / (16!) */ + ( real_type )+1.20526159051691591970423277706205872945739e-20, /* F^(17)(x_05) / (17!) */ + ( real_type )-1.95948974857494211215950833907597360806821e-23, /* F^(18)(x_05) / (18!) */ + ( real_type )-3.59536600902859557647883092757921319006825e-24, /* F^(19)(x_05) / (19!) */ + ( real_type )+2.40795309648214022788193623063523001920157e-26, /* F^(20)(x_05) / (20!) */ + ( real_type )+7.00323926311191736200537494782751559285669e-28, /* F^(21)(x_05) / (21!) */ + ( real_type )-7.78441079332947198755113952454837583682253e-30, /* F^(22)(x_05) / (22!) */ + ( real_type )-8.9371603466391182274230670125633184569874e-32, /* F^(23)(x_05) / (23!) */ + ( real_type )+1.50183338631292834133853201400728382047835e-33, /* F^(24)(x_05) / (24!) */ + + /* ===== n = 6, xn = 1.26, yn = 0 ==== */ + ( real_type )+0.492641603905036547612208594020732218930945, /* F^(00)(x_06) / ( 0!) */ + ( real_type )-0.244568262496934366985530989908628070574861, /* F^(01)(x_06) / ( 1!) */ + ( real_type )-0.0918566361649807874423578649627814077614793, /* F^(02)(x_06) / ( 2!) */ + ( real_type )+0.0529585703120516404982632776756223265207928, /* F^(03)(x_06) / ( 3!) */ + ( real_type )-0.00453451968274798890966050385162277970457304, /* F^(04)(x_06) / ( 4!) */ + ( real_type )-0.000600946259352815008741441117488809918002634, /* F^(05)(x_06) / ( 5!) */ + ( real_type )+0.0000925552193535377697832843655581015942555431, /* F^(06)(x_06) / ( 6!) */ + ( real_type )-0.000000683842699321859270625002242845197006069111, /* F^(07)(x_06) / ( 7!) */ + ( real_type )-0.000000386199152779874359679237080094239733663695, /* F^(08)(x_06) / ( 8!) */ + ( real_type )+0.0000000141558233627113708713650947897141565248638, /* F^(09)(x_06) / ( 9!) */ + ( real_type )+0.00000000050059953876864992928944458118691472135549, /* F^(10)(x_06) / (10!) */ + ( real_type )-0.0000000000338498941449318854211464020702185704087927, /* F^(11)(x_06) / (11!) */ + ( real_type )-3.82123326915424907884833505408506638663201e-14, /* F^(12)(x_06) / (12!) */ + ( real_type )+3.3953760021222363736009353137931291652325e-14, /* F^(13)(x_06) / (13!) */ + ( real_type )-4.07648467548627182210703734920692939624092e-16, /* F^(14)(x_06) / (14!) */ + ( real_type )-1.69808372700713846404256394452200058639957e-17, /* F^(15)(x_06) / (15!) */ + ( real_type )+3.79890962188860049156011620069629083794578e-19, /* F^(16)(x_06) / (16!) */ + ( real_type )+4.02380665295105758035971576850128501503803e-21, /* F^(17)(x_06) / (17!) */ + ( real_type )-1.69316236593517034277751019432977912398009e-22, /* F^(18)(x_06) / (18!) */ + ( real_type )-5.35826764133213341625423252651791216777408e-26, /* F^(19)(x_06) / (19!) */ + ( real_type )+4.48953212703780911121751499866346936803324e-26, /* F^(20)(x_06) / (20!) */ + ( real_type )-2.45037395368521031012102910585168847196496e-28, /* F^(21)(x_06) / (21!) */ + ( real_type )-7.55517157430196501954049555982248622822423e-30, /* F^(22)(x_06) / (22!) */ + ( real_type )+7.81906920454168911108447411439766635686812e-32, /* F^(23)(x_06) / (23!) */ + ( real_type )+7.97633787047635503067112106237182182646483e-34, /* F^(24)(x_06) / (24!) */ + + /* ===== n = 7, xn = 1.47, yn = 0 ==== */ + ( real_type )+0.435738088853804929355870047140050578499873, /* F^(00)(x_07) / ( 0!) */ + ( real_type )-0.284280682937530246809612503958135786323608, /* F^(01)(x_07) / ( 1!) */ + ( real_type )-0.00839906752556439970929800788439751886673422, /* F^(02)(x_07) / ( 2!) */ + ( real_type )+0.0343373143698519359473320420490896899374014, /* F^(03)(x_07) / ( 3!) */ + ( real_type )-0.00597533360544262908988268839928100425036875, /* F^(04)(x_07) / ( 4!) */ + ( real_type )+0.0000177140955604080399130692523633335407914121, /* F^(05)(x_07) / ( 5!) */ + ( real_type )+0.0000649423187865221212032792757406428568131929, /* F^(06)(x_07) / ( 6!) */ + ( real_type )-0.00000402680894084621142672946307513216242991228, /* F^(07)(x_07) / ( 7!) */ + ( real_type )-0.000000104475917419657637931334655758122372042821, /* F^(08)(x_07) / ( 8!) */ + ( real_type )+0.0000000162300154935154851698137082667573438590855, /* F^(09)(x_07) / ( 9!) */ + ( real_type )-0.000000000246189646010807826322700724933005930269327, /* F^(10)(x_07) / (10!) */ + ( real_type )-0.0000000000208296819046340174582566369764564801220082, /* F^(11)(x_07) / (11!) */ + ( real_type )+0.000000000000737184031155809790081089466499108526736617, /* F^(12)(x_07) / (12!) */ + ( real_type )+7.68556637183090708966053908604838277856378e-15, /* F^(13)(x_07) / (13!) */ + ( real_type )-6.94209813736546147107416680281722310426927e-16, /* F^(14)(x_07) / (14!) */ + ( real_type )+4.21401790876988604365225734559203446531703e-18, /* F^(15)(x_07) / (15!) */ + ( real_type )+3.13050957766201336484524745577980989482005e-19, /* F^(16)(x_07) / (16!) */ + ( real_type )-5.01532437957307470836817148201862957847434e-21, /* F^(17)(x_07) / (17!) */ + ( real_type )-6.80478881030661996816913441136208780595441e-23, /* F^(18)(x_07) / (18!) */ + ( real_type )+2.09922565349392982771621592770701523194946e-24, /* F^(19)(x_07) / (19!) */ + ( real_type )+2.43936047506214941276037218096754528256247e-27, /* F^(20)(x_07) / (20!) */ + ( real_type )-4.92318048483344913735808025230785098004258e-28, /* F^(21)(x_07) / (21!) */ + ( real_type )+2.5180219503513531179671571612310623260608e-30, /* F^(22)(x_07) / (22!) */ + ( real_type )+7.05758938483472465554401172063234309430156e-32, /* F^(23)(x_07) / (23!) */ + ( real_type )-7.41270123752632639683837007306138526826034e-34, /* F^(24)(x_07) / (24!) */ + + /* ===== n = 8, xn = 1.68, yn = 0 ==== */ + ( real_type )+0.376790235869403627825298481421444312322929, /* F^(00)(x_08) / ( 0!) */ + ( real_type )-0.269188162928517412746909357856579564490883, /* F^(01)(x_08) / ( 1!) */ + ( real_type )+0.0382896508472075737117419473879537517923446, /* F^(02)(x_08) / ( 2!) */ + ( real_type )+0.0155791663240967617583155525603246077743128, /* F^(03)(x_08) / ( 3!) */ + ( real_type )-0.00487522660791717751381340809270232063034693, /* F^(04)(x_08) / ( 4!) */ + ( real_type )+0.000345289311216379172336370760863877203263893, /* F^(05)(x_08) / ( 5!) */ + ( real_type )+0.0000218614127226933966405968302796535285098904, /* F^(06)(x_08) / ( 6!) */ + ( real_type )-0.00000385173224341866673697945428451799033129305, /* F^(07)(x_08) / ( 7!) */ + ( real_type )+0.000000105127157141589935389497552977462326446207, /* F^(08)(x_08) / ( 8!) */ + ( real_type )+0.00000000751630285846452989552388151477530753661217, /* F^(09)(x_08) / ( 9!) */ + ( real_type )-0.000000000486796632623157106683813739893916830138218, /* F^(10)(x_08) / (10!) */ + ( real_type )+0.00000000000112789631392641996316845327653417973432151, /* F^(11)(x_08) / (11!) */ + ( real_type )+0.000000000000588258673400975891945519348072647345523745, /* F^(12)(x_08) / (12!) */ + ( real_type )-1.28371930110337750061139950132848192619501e-14, /* F^(13)(x_08) / (13!) */ + ( real_type )-2.41123791705713122402136186809718657811174e-16, /* F^(14)(x_08) / (14!) */ + ( real_type )+1.17604052094837493051233538981873705700963e-17, /* F^(15)(x_08) / (15!) */ + ( real_type )-2.91568656289377748755065488479545659003615e-20, /* F^(16)(x_08) / (16!) */ + ( real_type )-4.74684446772389129688973624709548649253582e-21, /* F^(17)(x_08) / (17!) */ + ( real_type )+5.99370071740934610862117711606332084440774e-23, /* F^(18)(x_08) / (18!) */ + ( real_type )+9.01756471699407153159648082651826195386547e-25, /* F^(19)(x_08) / (19!) */ + ( real_type )-2.33666353338090758909155878496871976610197e-26, /* F^(20)(x_08) / (20!) */ + ( real_type )-2.60022239963113518319248970790192361562344e-29, /* F^(21)(x_08) / (21!) */ + ( real_type )+4.77888487611480839897756688038684565417787e-30, /* F^(22)(x_08) / (22!) */ + ( real_type )-2.59611680145341417074332511703574131154688e-32, /* F^(23)(x_08) / (23!) */ + ( real_type )-5.69630315499334416671257311363877079927607e-34, /* F^(24)(x_08) / (24!) */ + + /* ===== n = 9, xn = 1.89, yn = 0 ==== */ + ( real_type )+0.324173662408523334351848683232449479096986, /* F^(00)(x_09) / ( 0!) */ + ( real_type )-0.228447562811246251456572360732510355778273, /* F^(01)(x_09) / ( 1!) */ + ( real_type )+0.0543371756695505590321458034307577931644895, /* F^(02)(x_09) / ( 2!) */ + ( real_type )+0.00250425172635110376921261803553175035696127, /* F^(03)(x_09) / ( 3!) */ + ( real_type )-0.00285716123720916625805314081653387920074279, /* F^(04)(x_09) / ( 4!) */ + ( real_type )+0.000383000458270998891269044171020787303553263, /* F^(05)(x_09) / ( 5!) */ + ( real_type )-0.00000856960174374598695817247388277955057129653, /* F^(06)(x_09) / ( 6!) */ + ( real_type )-0.00000194270488023624985818102825015437778913781, /* F^(07)(x_09) / ( 7!) */ + ( real_type )+0.000000153285729076764259701777831973319503711765, /* F^(08)(x_09) / ( 8!) */ + ( real_type )-0.00000000117525946225135754354468807727526673880385, /* F^(09)(x_09) / ( 9!) */ + ( real_type )-0.000000000296098805460711241095094939396080249474231, /* F^(10)(x_09) / (10!) */ + ( real_type )+0.0000000000112157894518166014966803346406315110571553, /* F^(11)(x_09) / (11!) */ + ( real_type )+7.87097741117849305337280414792377984384492e-14, /* F^(12)(x_09) / (12!) */ + ( real_type )-1.28258394062496894290445425558464294392721e-14, /* F^(13)(x_09) / (13!) */ + ( real_type )+1.86193800462734059301277584247508549244911e-16, /* F^(14)(x_09) / (14!) */ + ( real_type )+5.00749451246536803586505740579156944202749e-18, /* F^(15)(x_09) / (15!) */ + ( real_type )-1.71100034142632832126826784742445101690909e-19, /* F^(16)(x_09) / (16!) */ + ( real_type )+7.76548882105835265159181302524344705003262e-23, /* F^(17)(x_09) / (17!) */ + ( real_type )+6.12195821795120340714008887989544412743059e-23, /* F^(18)(x_09) / (18!) */ + ( real_type )-6.66532732797539046507051470785025142552175e-25, /* F^(19)(x_09) / (19!) */ + ( real_type )-9.79589573652648150767026625351912506766955e-27, /* F^(20)(x_09) / (20!) */ + ( real_type )+2.35316467996805205604892300760967148279938e-28, /* F^(21)(x_09) / (21!) */ + ( real_type )+8.51588598398726473604324792011127309897362e-32, /* F^(22)(x_09) / (22!) */ + ( real_type )-4.1049363780469406386143588525443429354141e-32, /* F^(23)(x_09) / (23!) */ + ( real_type )+2.57205495751264034302037195216713507257028e-34, /* F^(24)(x_09) / (24!) */ + + /* ===== n = 10, xn = 2.11, yn = 0 ==== */ + ( real_type )+0.280919842933869131514286981833391921653268, /* F^(00)(x_10) / ( 0!) */ + ( real_type )-0.182820391300501540716223565769208858554178, /* F^(01)(x_10) / ( 1!) */ + ( real_type )+0.0519825956914880979982809050089198589703428, /* F^(02)(x_10) / ( 2!) */ + ( real_type )-0.00400596620239484406669664435799083501482239, /* F^(03)(x_10) / ( 3!) */ + ( real_type )-0.00111173985493616798387902490093050851301577, /* F^(04)(x_10) / ( 4!) */ + ( real_type )+0.000267359720668725162961733423370721817460206, /* F^(05)(x_10) / ( 5!) */ + ( real_type )-0.000018917477713425636216582476092850523417058, /* F^(06)(x_10) / ( 6!) */ + ( real_type )-0.000000193211615282357914903176758002288161841488, /* F^(07)(x_10) / ( 7!) */ + ( real_type )+0.0000000971643159854669410913639011381389348326214, /* F^(08)(x_10) / ( 8!) */ + ( real_type )-0.00000000445444451348028249514150706592724231287508, /* F^(09)(x_10) / ( 9!) */ + ( real_type )-0.0000000000283651437275988621640532247893647540987269, /* F^(10)(x_10) / (10!) */ + ( real_type )+0.00000000000834976104803522502335599920876220817243449, /* F^(11)(x_10) / (11!) */ + ( real_type )-0.000000000000208330484470435966588084472130767009416342, /* F^(12)(x_10) / (12!) */ + ( real_type )-3.04405830825599590120017920238309599288295e-15, /* F^(13)(x_10) / (13!) */ + ( real_type )+2.28918017236693429146844748426578195027284e-16, /* F^(14)(x_10) / (14!) */ + ( real_type )-2.35111050277976244221313806575007556022695e-18, /* F^(15)(x_10) / (15!) */ + ( real_type )-8.05585533396194563259286674719997683495321e-20, /* F^(16)(x_10) / (16!) */ + ( real_type )+2.19059765275386850274606689334531736451491e-21, /* F^(17)(x_10) / (17!) */ + ( real_type )+7.83666047092060754024491742477943363664212e-25, /* F^(18)(x_10) / (18!) */ + ( real_type )-6.83377799747773165245198001337998400981966e-25, /* F^(19)(x_10) / (19!) */ + ( real_type )+6.98722261653128008904734018834933392663188e-27, /* F^(20)(x_10) / (20!) */ + ( real_type )+8.82491956317301155115873846154532288375101e-29, /* F^(21)(x_10) / (21!) */ + ( real_type )-2.14261432553229783763777970821436332058291e-30, /* F^(22)(x_10) / (22!) */ + ( real_type )+1.888241689796128813367973546169983178144e-33, /* F^(23)(x_10) / (23!) */ + ( real_type )+3.09659371269555845595381000482775199805887e-34, /* F^(24)(x_10) / (24!) */ + + /* ===== n = 11, xn = 2.32, yn = 0 ==== */ + ( real_type )+0.246774606738902005981196594245619278443541, /* F^(00)(x_11) / ( 0!) */ + ( real_type )-0.142956073317019863319201117659259467890819, /* F^(01)(x_11) / ( 1!) */ + ( real_type )+0.0421407815239404243774503278113824835876131, /* F^(02)(x_11) / ( 2!) */ + ( real_type )-0.00580247591276830463078911442878534258691992, /* F^(03)(x_11) / ( 3!) */ + ( real_type )-0.0000762018168189732916350653607741992666149718, /* F^(04)(x_11) / ( 4!) */ + ( real_type )+0.000130166907476565355636011738492224796481287, /* F^(05)(x_11) / ( 5!) */ + ( real_type )-0.0000158999328218034426052460364979309963441674, /* F^(06)(x_11) / ( 6!) */ + ( real_type )+0.00000061740459108534452565859544869027082591808, /* F^(07)(x_11) / ( 7!) */ + ( real_type )+0.0000000263012475469876146427892901558080190435981, /* F^(08)(x_11) / ( 8!) */ + ( real_type )-0.00000000340947471851221356652669775483490022193239, /* F^(09)(x_11) / ( 9!) */ + ( real_type )+0.0000000000994652965073768360981952529680698192909056, /* F^(10)(x_11) / (10!) */ + ( real_type )+0.00000000000182821132566213554710245747693653803676963, /* F^(11)(x_11) / (11!) */ + ( real_type )-0.000000000000184389614251769904730622796353593516882596, /* F^(12)(x_11) / (12!) */ + ( real_type )+3.25037377438772647323384060515766412951462e-15, /* F^(13)(x_11) / (13!) */ + ( real_type )+6.79248480473373780109216063346737793578012e-17, /* F^(14)(x_11) / (14!) */ + ( real_type )-3.461948474871399483221167536096964238704e-18, /* F^(15)(x_11) / (15!) */ + ( real_type )+2.72564111983474114299149621244885865934746e-20, /* F^(16)(x_11) / (16!) */ + ( real_type )+1.06056518709979993989865804623844333792893e-21, /* F^(17)(x_11) / (17!) */ + ( real_type )-2.50578024828813821855215056078956769757409e-23, /* F^(18)(x_11) / (18!) */ + ( real_type )-4.94012713959183175280963636669539779085841e-27, /* F^(19)(x_11) / (19!) */ + ( real_type )+6.65136002026932178721593637893447912220224e-27, /* F^(20)(x_11) / (20!) */ + ( real_type )-6.87353431457782385219049789816337918464072e-29, /* F^(21)(x_11) / (21!) */ + ( real_type )-6.51053080204830454222272163929173744829049e-31, /* F^(22)(x_11) / (22!) */ + ( real_type )+1.75124221145617571932816370663470025029508e-32, /* F^(23)(x_11) / (23!) */ + ( real_type )-4.25292608699205470130068804252976943907152e-35, /* F^(24)(x_11) / (24!) */ + + /* ===== n = 12, xn = 2.53, yn = 0 ==== */ + ( real_type )+0.220091142230898461206668437270154827354414, /* F^(00)(x_12) / ( 0!) */ + ( real_type )-0.112039455482434268576786978848711660038791, /* F^(01)(x_12) / ( 1!) */ + ( real_type )+0.0314779515992045744796314508750583092626526, /* F^(02)(x_12) / ( 2!) */ + ( real_type )-0.005223004089809285844078613129875926160709, /* F^(03)(x_12) / ( 3!) */ + ( real_type )+0.000337788395937829396236155826555051121871222, /* F^(04)(x_12) / ( 4!) */ + ( real_type )+0.000036191269143487569010892731720306270727942, /* F^(05)(x_12) / ( 5!) */ + ( real_type )-0.00000883268077032501439119102502651992261443067, /* F^(06)(x_12) / ( 6!) */ + ( real_type )+0.00000066458215113760837531189303096982070389352, /* F^(07)(x_12) / ( 7!) */ + ( real_type )-0.0000000130354013501083478093394179928229820608998, /* F^(08)(x_12) / ( 8!) */ + ( real_type )-0.00000000123805502813075923489374970602489244633395, /* F^(09)(x_12) / ( 9!) */ + ( real_type )+0.0000000000915219178718767915978920863646960086668954, /* F^(10)(x_12) / (10!) */ + ( real_type )-0.00000000000177533493205884576471276081248680361278973, /* F^(11)(x_12) / (11!) */ + ( real_type )-5.32655233545048198183466033328815246488927e-14, /* F^(12)(x_12) / (12!) */ + ( real_type )+3.34331491546313941875717419738594787395546e-15, /* F^(13)(x_12) / (13!) */ + ( real_type )-4.4376748058886528337889162479700624256588e-17, /* F^(14)(x_12) / (14!) */ + ( real_type )-1.12620914613961248349983395099804614412319e-18, /* F^(15)(x_12) / (15!) */ + ( real_type )+4.53407017088641716427471175076213454210652e-20, /* F^(16)(x_12) / (16!) */ + ( real_type )-3.05583799302382186808927498792351871383335e-22, /* F^(17)(x_12) / (17!) */ + ( real_type )-1.1698141502076891232239615623172009766002e-23, /* F^(18)(x_12) / (18!) */ + ( real_type )+2.57784361901470817141511743003479071348935e-25, /* F^(19)(x_12) / (19!) */ + ( real_type )-1.77765228735186132546691186798532742046777e-28, /* F^(20)(x_12) / (20!) */ + ( real_type )-5.64178094825416738025675375805996608190427e-29, /* F^(21)(x_12) / (21!) */ + ( real_type )+6.23943032212458217387509224240151347107595e-31, /* F^(22)(x_12) / (22!) */ + ( real_type )+3.73598771856202423334606299903143862776185e-33, /* F^(23)(x_12) / (23!) */ + ( real_type )-1.26966120433902083770494022748064735034667e-34, /* F^(24)(x_12) / (24!) */ + + /* ===== n = 13, xn = 2.74, yn = 0 ==== */ + ( real_type )+0.199018995238297948215403032191131604653406, /* F^(00)(x_13) / ( 0!) */ + ( real_type )-0.0893671318306833571669414974182191598128488, /* F^(01)(x_13) / ( 1!) */ + ( real_type )+0.0227823669912598137262272861590280385613666, /* F^(02)(x_13) / ( 2!) */ + ( real_type )-0.00392626118308793285145521595547217233123655, /* F^(03)(x_13) / ( 3!) */ + ( real_type )+0.000393929323964431974419746451387069926787847, /* F^(04)(x_13) / ( 4!) */ + ( real_type )-0.00000772456516413801611106671252838701350954494, /* F^(05)(x_13) / ( 5!) */ + ( real_type )-0.00000320249720037913660791200031179144456128864, /* F^(06)(x_13) / ( 6!) */ + ( real_type )+0.000000410292110503728174453327717161143997401461, /* F^(07)(x_13) / ( 7!) */ + ( real_type )-0.0000000207939101071789096671940265637486307406368, /* F^(08)(x_13) / ( 8!) */ + ( real_type )+0.000000000138842837072058142453305141016512459627577, /* F^(09)(x_13) / ( 9!) */ + ( real_type )+0.000000000038608870676804220103787009960720553502771, /* F^(10)(x_13) / (10!) */ + ( real_type )-0.00000000000197604407623377959262781934131798917567426, /* F^(11)(x_13) / (11!) */ + ( real_type )+2.63642124366322162788507073308300111294863e-14, /* F^(12)(x_13) / (12!) */ + ( real_type )+1.09485979902332147565004058013169819739117e-15, /* F^(13)(x_13) / (13!) */ + ( real_type )-5.12701503939427136125251127044054492521827e-17, /* F^(14)(x_13) / (14!) */ + ( real_type )+5.52124755400072928067465386872786641113521e-19, /* F^(15)(x_13) / (15!) */ + ( real_type )+1.48979042837953600216992671888575921750784e-20, /* F^(16)(x_13) / (16!) */ + ( real_type )-5.20975840391529855163904706512954972178875e-22, /* F^(17)(x_13) / (17!) */ + ( real_type )+3.3918599073883057872329983474299643307675e-24, /* F^(18)(x_13) / (18!) */ + ( real_type )+1.0892031708742144894907191191581750403323e-25, /* F^(19)(x_13) / (19!) */ + ( real_type )-2.38308326208795272596298340408216778111126e-27, /* F^(20)(x_13) / (20!) */ + ( real_type )+4.8803027558236420373867561311536905663204e-30, /* F^(21)(x_13) / (21!) */ + ( real_type )+4.13733625078312365797548809928163285411591e-31, /* F^(22)(x_13) / (22!) */ + ( real_type )-5.11968071426848961666547336267267011868141e-33, /* F^(23)(x_13) / (23!) */ + ( real_type )-1.38078497824722079623098434864174989263826e-35, /* F^(24)(x_13) / (24!) */ + + /* ===== n = 14, xn = 2.95, yn = 0 ==== */ + ( real_type )+0.182022563204593041262776480636221471420828, /* F^(00)(x_14) / ( 0!) */ + ( real_type )-0.0729751094165483941974409512371333066915369, /* F^(01)(x_14) / ( 1!) */ + ( real_type )+0.0165309849063011069529402468571940792372585, /* F^(02)(x_14) / ( 2!) */ + ( real_type )-0.00271896626076786190760632294353347561018785, /* F^(03)(x_14) / ( 3!) */ + ( real_type )+0.000312933374099297682051014076328893191732898, /* F^(04)(x_14) / ( 4!) */ + ( real_type )-0.0000194070703617403164670922543845722329709071, /* F^(05)(x_14) / ( 5!) */ + ( real_type )-0.000000299271582806647748438541057492781088825343, /* F^(06)(x_14) / ( 6!) */ + ( real_type )+0.000000168023483242427372276464317070824195878903, /* F^(07)(x_14) / ( 7!) */ + ( real_type )-0.0000000141398132889344969585968873683140545055769, /* F^(08)(x_14) / ( 8!) */ + ( real_type )+0.000000000510427253384752146289088191306079793673518, /* F^(09)(x_14) / ( 9!) */ + ( real_type )+0.00000000000133346395132635962154155052617034381564439, /* F^(10)(x_14) / (10!) */ + ( real_type )-0.000000000000908643551722753555989870139530781261567027, /* F^(11)(x_14) / (11!) */ + ( real_type )+3.55122682606336607263106846837115106889454e-14, /* F^(12)(x_14) / (12!) */ + ( real_type )-3.42573278750830536672976990611204197962253e-16, /* F^(13)(x_14) / (13!) */ + ( real_type )-1.75716661167245698151734097893045405129559e-17, /* F^(14)(x_14) / (14!) */ + ( real_type )+6.77863944253227808426083829137679841284025e-19, /* F^(15)(x_14) / (15!) */ + ( real_type )-6.45679980687896110145967583898759709363745e-21, /* F^(16)(x_14) / (16!) */ + ( real_type )-1.61494316013653695463849415733415583948163e-22, /* F^(17)(x_14) / (17!) */ + ( real_type )+5.28268518854295322544987930778545529984376e-24, /* F^(18)(x_14) / (18!) */ + ( real_type )-3.65546429798946451838852366511563018861084e-26, /* F^(19)(x_14) / (19!) */ + ( real_type )-8.51480310965487686522885177991726759989312e-28, /* F^(20)(x_14) / (20!) */ + ( real_type )+1.96705593137527678059529813317321754405483e-29, /* F^(21)(x_14) / (21!) */ + ( real_type )-7.2023569715911551727975575821221560123373e-32, /* F^(22)(x_14) / (22!) */ + ( real_type )-2.57783114363382713171836200298528037452231e-33, /* F^(23)(x_14) / (23!) */ + ( real_type )+3.72544363208960132546249673613359012782809e-35, /* F^(24)(x_14) / (24!) */ + + /* ===== n = 15, xn = 3.16, yn = 0 ==== */ + ( real_type )+0.167986296744659197005015872159861318018257, /* F^(00)(x_15) / ( 0!) */ + ( real_type )-0.0609660847031107336318690424778290956765, /* F^(01)(x_15) / ( 1!) */ + ( real_type )+0.0122690906325820876043579406459276498654075, /* F^(02)(x_15) / ( 2!) */ + ( real_type )-0.00183587875175875201936680777100176571788834, /* F^(03)(x_15) / ( 3!) */ + ( real_type )+0.000213476871266481806787254807022090150358404, /* F^(04)(x_15) / ( 4!) */ + ( real_type )-0.0000172134240216203642309410596760332833706537, /* F^(05)(x_15) / ( 5!) */ + ( real_type )+0.000000647933714867225156004348125815800714045959, /* F^(06)(x_15) / ( 6!) */ + ( real_type )+0.0000000335835728816991982719048875416121257265654, /* F^(07)(x_15) / ( 7!) */ + ( real_type )-0.00000000620672960356930015203949717754659995793662, /* F^(08)(x_15) / ( 8!) */ + ( real_type )+0.000000000380302521923485924287562070884513282149737, /* F^(09)(x_15) / ( 9!) */ + ( real_type )-0.0000000000102263741954228888031019307922827370373284, /* F^(10)(x_15) / (10!) */ + ( real_type )-9.4817172622226984669340769891926916692377e-14, /* F^(11)(x_15) / (11!) */ + ( real_type )+1.70707365526119549486351558428045412271987e-14, /* F^(12)(x_15) / (12!) */ + ( real_type )-5.44451574694191582904710648389144431064473e-16, /* F^(13)(x_15) / (13!) */ + ( real_type )+4.14476715532687337552747891130130680577486e-18, /* F^(14)(x_15) / (14!) */ + ( real_type )+2.29339198286217757333611416718321270052831e-19, /* F^(15)(x_15) / (15!) */ + ( real_type )-7.81677232480281191860428385412314480811424e-21, /* F^(16)(x_15) / (16!) */ + ( real_type )+7.16321404200637899199715769416928417273067e-23, /* F^(17)(x_15) / (17!) */ + ( real_type )+1.44199615760402781893388225779359750010968e-24, /* F^(18)(x_15) / (18!) */ + ( real_type )-4.72755425668393850088537433841735272913779e-26, /* F^(19)(x_15) / (19!) */ + ( real_type )+3.66983262212193506206962903352163464480328e-28, /* F^(20)(x_15) / (20!) */ + ( real_type )+5.46432025438973745316784058379575855644371e-30, /* F^(21)(x_15) / (21!) */ + ( real_type )-1.43517114092698128698221713303281872947594e-31, /* F^(22)(x_15) / (22!) */ + ( real_type )+7.74418698898856930344130114174681641834771e-34, /* F^(23)(x_15) / (23!) */ + ( real_type )+1.31747978902940016016034797164567134026281e-35, /* F^(24)(x_15) / (24!) */ + + /* ===== n = 16, xn = 3.37, yn = 0 ==== */ + ( real_type )+0.156145368002923681693730536418605551404876, /* F^(00)(x_16) / ( 0!) */ + ( real_type )-0.0519266897039068498588388995227089626906795, /* F^(01)(x_16) / ( 1!) */ + ( real_type )+0.0093827933945917961658986466210755062574504, /* F^(02)(x_16) / ( 2!) */ + ( real_type )-0.00125374532241824383186158362523785915107338, /* F^(03)(x_16) / ( 3!) */ + ( real_type )+0.000136943042559339200006042324962536746096464, /* F^(04)(x_16) / ( 4!) */ + ( real_type )-0.0000118276397570991583685253307814694123873409, /* F^(05)(x_16) / ( 5!) */ + ( real_type )+0.00000069177012493817875304807831048417719834437, /* F^(06)(x_16) / ( 6!) */ + ( real_type )-0.0000000146489697785093510284689128741890723761587, /* F^(07)(x_16) / ( 7!) */ + ( real_type )-0.00000000154626266754069723013735482111839115245114, /* F^(08)(x_16) / ( 8!) */ + ( real_type )+0.000000000173816912211259682817382516747744908499448, /* F^(09)(x_16) / ( 9!) */ + ( real_type )-0.00000000000827363166671489173947451385581037786389031, /* F^(10)(x_16) / (10!) */ + ( real_type )+0.000000000000173345187880260673840363383432941113239096, /* F^(11)(x_16) / (11!) */ + ( real_type )+2.33678823763154143013715861406835482492783e-15, /* F^(12)(x_16) / (12!) */ + ( real_type )-2.64103183651301594606507197410259785583298e-16, /* F^(13)(x_16) / (13!) */ + ( real_type )+7.24344676042649042974176085815158964168172e-18, /* F^(14)(x_16) / (14!) */ + ( real_type )-4.9195248386546525221403092868295506049051e-20, /* F^(15)(x_16) / (15!) */ + ( real_type )-2.47801672140950264731896834633767045631282e-21, /* F^(16)(x_16) / (16!) */ + ( real_type )+7.90429530610934836369135838640690729859345e-23, /* F^(17)(x_16) / (17!) */ + ( real_type )-7.43729985546175039636374102049358349455344e-25, /* F^(18)(x_16) / (18!) */ + ( real_type )-1.04491935139813479613310913733514752956974e-26, /* F^(19)(x_16) / (19!) */ + ( real_type )+3.71704834326574018957518742207590660611262e-28, /* F^(20)(x_16) / (20!) */ + ( real_type )-3.30883770345350407672892070851137352762867e-30, /* F^(21)(x_16) / (21!) */ + ( real_type )-2.70854199420389520074110797603204757773261e-32, /* F^(22)(x_16) / (22!) */ + ( real_type )+9.1356072796019771680154820807716010541804e-34, /* F^(23)(x_16) / (23!) */ + ( real_type )-6.59593831568356595110138150063027586719997e-36, /* F^(24)(x_16) / (24!) */ + + /* ===== n = 17, xn = 3.58, yn = 0 ==== */ + ( real_type )+0.145980869408444698072701944127637946509207, /* F^(00)(x_17) / ( 0!) */ + ( real_type )-0.0449156968183408738979169601994183904966935, /* F^(01)(x_17) / ( 1!) */ + ( real_type )+0.00738502276017710006943796780368071839698212, /* F^(02)(x_17) / ( 2!) */ + ( real_type )-0.000882835414243326826416492251506796046765968, /* F^(03)(x_17) / ( 3!) */ + ( real_type )+0.0000872434036453371712687022542389902488384097, /* F^(04)(x_17) / ( 4!) */ + ( real_type )-0.00000732245570621947167749069340131634517685262, /* F^(05)(x_17) / ( 5!) */ + ( real_type )+0.000000486555713949247850045246631326827804693294, /* F^(06)(x_17) / ( 6!) */ + ( real_type )-0.0000000212631839860806078088542982222849809321144, /* F^(07)(x_17) / ( 7!) */ + ( real_type )+0.000000000205995538613406714372271492937649970268527, /* F^(08)(x_17) / ( 8!) */ + ( real_type )+0.0000000000474234767267443646723347944334658228775228, /* F^(09)(x_17) / ( 9!) */ + ( real_type )-0.00000000000385229040823652343167027540065646269772333, /* F^(10)(x_17) / (10!) */ + ( real_type )+0.000000000000149500775971526115426230321617975910978568, /* F^(11)(x_17) / (11!) */ + ( real_type )-2.56732208068896703824641920991850990459902e-15, /* F^(12)(x_17) / (12!) */ + ( real_type )-3.86992590894217448435183967665818369599323e-17, /* F^(13)(x_17) / (13!) */ + ( real_type )+3.42845842221511989369768463529444946469689e-18, /* F^(14)(x_17) / (14!) */ + ( real_type )-8.44981284962332699446254141840739101576719e-20, /* F^(15)(x_17) / (15!) */ + ( real_type )+5.76956717216848882444928525707562321777043e-22, /* F^(16)(x_17) / (16!) */ + ( real_type )+2.22576837801971548588425013996610514647691e-23, /* F^(17)(x_17) / (17!) */ + ( real_type )-7.01220427264228810890380106947162465629995e-25, /* F^(18)(x_17) / (18!) */ + ( real_type )+7.05314689779316747366451006096598724719936e-27, /* F^(19)(x_17) / (19!) */ + ( real_type )+5.83174837406035471213866350406476340829983e-29, /* F^(20)(x_17) / (20!) */ + ( real_type )-2.54590725558289768183998109419469364604442e-30, /* F^(21)(x_17) / (21!) */ + ( real_type )+2.61762191625852862216552647738276296402043e-32, /* F^(22)(x_17) / (22!) */ + ( real_type )+8.33260732884785084315052564537384226212931e-35, /* F^(23)(x_17) / (23!) */ + ( real_type )-4.9872087362724129964291038369930432591696e-36, /* F^(24)(x_17) / (24!) */ + + /* ===== n = 18, xn = 3.79, yn = 0 ==== */ + ( real_type )+0.137133914188937040815955863740722859588214, /* F^(00)(x_18) / ( 0!) */ + ( real_type )-0.0393307180635227779633260870557871663662138, /* F^(01)(x_18) / ( 1!) */ + ( real_type )+0.00595440344694305475418919178826852202985272, /* F^(02)(x_18) / ( 2!) */ + ( real_type )-0.000644154696800392138613416196403294440407929, /* F^(03)(x_18) / ( 3!) */ + ( real_type )+0.000057025765388084767296546481835754414214561, /* F^(04)(x_18) / ( 4!) */ + ( real_type )-0.00000440471704480101203796579338379851113563086, /* F^(05)(x_18) / ( 5!) */ + ( real_type )+0.000000293689236113891033338814866399319857851371, /* F^(06)(x_18) / ( 6!) */ + ( real_type )-0.0000000154615560866386289431113585388012634610481, /* F^(07)(x_18) / ( 7!) */ + ( real_type )+0.000000000519861085879214972113039950939599072008509, /* F^(08)(x_18) / ( 8!) */ + ( real_type )-0.000000000000921120828507774105870188692570554308367651, /* F^(09)(x_18) / ( 9!) */ + ( real_type )-0.00000000000108543559471719846581025991680132284864805, /* F^(10)(x_18) / (10!) */ + ( real_type )+6.9509780246892610701365769565670376625145e-14, /* F^(11)(x_18) / (11!) */ + ( real_type )-2.28790999839926726269942555782009901435274e-15, /* F^(12)(x_18) / (12!) */ + ( real_type )+3.40531721128416485993174801885423562764072e-17, /* F^(13)(x_18) / (13!) */ + ( real_type )+4.79076298106514776157875767589454512393338e-19, /* F^(14)(x_18) / (14!) */ + ( real_type )-3.77583685425349437242219364119151285294837e-20, /* F^(15)(x_18) / (15!) */ + ( real_type )+8.68327531850711518954734232939287900875659e-22, /* F^(16)(x_18) / (16!) */ + ( real_type )-6.44018198834127853640259613430759137617973e-24, /* F^(17)(x_18) / (17!) */ + ( real_type )-1.64649320632836340808881212454530491101081e-25, /* F^(18)(x_18) / (18!) */ + ( real_type )+5.4389039108169344095297910634656750393765e-27, /* F^(19)(x_18) / (19!) */ + ( real_type )-5.97241476173639249839570742813230559019809e-29, /* F^(20)(x_18) / (20!) */ + ( real_type )-2.06902992439643234016956171693504908169284e-31, /* F^(21)(x_18) / (21!) */ + ( real_type )+1.49919854315401637064030127801763700629822e-32, /* F^(22)(x_18) / (22!) */ + ( real_type )-1.79232713770736485255921799143560185060784e-34, /* F^(23)(x_18) / (23!) */ + ( real_type )+9.50423563840936680485571553880822964657552e-38, /* F^(24)(x_18) / (24!) */ + + /* ===== n = 19, xn = 4.00, yn = 0 ==== */ + ( real_type )+0.129348001236005115591470526257665945278322, /* F^(00)(x_19) / ( 0!) */ + ( real_type )-0.0347840098880409247317642100613275622265786, /* F^(01)(x_19) / ( 1!) */ + ( real_type )+0.00489401915807929166779315699382215181399605, /* F^(02)(x_19) / ( 2!) */ + ( real_type )-0.000485349264065934290064560654361072476154421, /* F^(03)(x_19) / ( 3!) */ + ( real_type )+0.0000387571671129966588742321191046132458273751, /* F^(04)(x_19) / ( 4!) */ + ( real_type )-0.0000026953081948402450384630650262547891416716, /* F^(05)(x_19) / ( 5!) */ + ( real_type )+0.000000168322186486758243278102015783139300067299, /* F^(06)(x_19) / ( 6!) */ + ( real_type )-0.00000000914574340708811428715226770435751061186111, /* F^(07)(x_19) / ( 7!) */ + ( real_type )+0.000000000391779593355843556973935178298531236896481, /* F^(08)(x_19) / ( 8!) */ + ( real_type )-0.0000000000104666777169718504197952407444305215087231, /* F^(09)(x_19) / ( 9!) */ + ( real_type )-3.32871012107932041362366922200721390721088e-14, /* F^(10)(x_19) / (10!) */ + ( real_type )+1.950109382711335303346232798881554887869e-14, /* F^(11)(x_19) / (11!) */ + ( real_type )-1.04136493331085142592942643849471213584628e-15, /* F^(12)(x_19) / (12!) */ + ( real_type )+3.00635335027687528709567466064404671024964e-17, /* F^(13)(x_19) / (13!) */ + ( real_type )-4.09685093275607318179218512590417702891314e-19, /* F^(14)(x_19) / (14!) */ + ( real_type )-4.521376685291900351060641207223540152751e-21, /* F^(15)(x_19) / (15!) */ + ( real_type )+3.54670674163084030855654679699911516696028e-22, /* F^(16)(x_19) / (16!) */ + ( real_type )-7.86226058008022388568393524808425472136453e-24, /* F^(17)(x_19) / (17!) */ + ( real_type )+6.53460585629525030395253874981183022355701e-26, /* F^(18)(x_19) / (18!) */ + ( real_type )+9.71789585579453267724871452206751911481527e-28, /* F^(19)(x_19) / (19!) */ + ( real_type )-3.66321229123660398385830573331135388179385e-29, /* F^(20)(x_19) / (20!) */ + ( real_type )+4.44167856555516988517408874023204352370419e-31, /* F^(21)(x_19) / (21!) */ + ( real_type )-1.33407524261587405918789469664625498067054e-34, /* F^(22)(x_19) / (22!) */ + ( real_type )-7.43131112597520702520287679485680200275853e-35, /* F^(23)(x_19) / (23!) */ + ( real_type )+1.05226657007227926767896076704904639803818e-36, /* F^(24)(x_19) / (24!) */ + + /* ===== n = 20, xn = 4.21, yn = 0 ==== */ + ( real_type )+0.122433401676389459794733090423992918624035, /* F^(00)(x_20) / ( 0!) */ + ( real_type )-0.0310181193801217094594864354408594076367166, /* F^(01)(x_20) / ( 1!) */ + ( real_type )+0.00408460311995623376081223293898019189322532, /* F^(02)(x_20) / ( 2!) */ + ( real_type )-0.000375393163571256641644161140416458277054657, /* F^(03)(x_20) / ( 3!) */ + ( real_type )+0.0000273835525831867270568522939804521607104964, /* F^(04)(x_20) / ( 4!) */ + ( real_type )-0.0000017160702302798694531832244459269447254355, /* F^(05)(x_20) / ( 5!) */ + ( real_type )+0.0000000971582415388303678881184573871787474816884, /* F^(06)(x_20) / ( 6!) */ + ( real_type )-0.00000000502349501014001231185105340107098398704639, /* F^(07)(x_20) / ( 7!) */ + ( real_type )+0.000000000227244035817817140178746271740279560341518, /* F^(08)(x_20) / ( 8!) */ + ( real_type )-0.00000000000812049670739810391746225707204855491863068, /* F^(09)(x_20) / ( 9!) */ + ( real_type )+0.000000000000178844555413813898551625608742494686626506, /* F^(10)(x_20) / (10!) */ + ( real_type )+9.75536590649226927601228999178951589862882e-16, /* F^(11)(x_20) / (11!) */ + ( real_type )-2.8286275602297871618256166773315841558988e-16, /* F^(12)(x_20) / (12!) */ + ( real_type )+1.31326196698704578447461907945994499813965e-17, /* F^(13)(x_20) / (13!) */ + ( real_type )-3.42209869132805081091475531703395245946741e-19, /* F^(14)(x_20) / (14!) */ + ( real_type )+4.46968353220394220538798717992011292281571e-21, /* F^(15)(x_20) / (15!) */ + ( real_type )+3.12052432806624504389579139501977464829964e-23, /* F^(16)(x_20) / (16!) */ + ( real_type )-2.84253092585874240227126191050348580821584e-24, /* F^(17)(x_20) / (17!) */ + ( real_type )+6.25490661781008082817599787984002531175285e-26, /* F^(18)(x_20) / (18!) */ + ( real_type )-5.84188944049975754202177601360072426779627e-28, /* F^(19)(x_20) / (19!) */ + ( real_type )-4.1615659616059869841857464085674135966624e-30, /* F^(20)(x_20) / (20!) */ + ( real_type )+2.11935737862741593685609986321106125221365e-31, /* F^(21)(x_20) / (21!) */ + ( real_type )-2.86855865219756110241965428344772846702871e-33, /* F^(22)(x_20) / (22!) */ + ( real_type )+9.2427185759500816741427252006799594178849e-36, /* F^(23)(x_20) / (23!) */ + ( real_type )+2.9792788752229144108828250179645223774883e-37, /* F^(24)(x_20) / (24!) */ + + /* ===== n = 21, xn = 4.42, yn = 0 ==== */ + ( real_type )+0.116245536536035770230943819646142346156529, /* F^(00)(x_21) / ( 0!) */ + ( real_type )-0.0278552704238951228348959444822900009422557, /* F^(01)(x_21) / ( 1!) */ + ( real_type )+0.00345204003743448516681121339704153987269848, /* F^(02)(x_21) / ( 2!) */ + ( real_type )-0.000296447884415123466904746208988450204230074, /* F^(03)(x_21) / ( 3!) */ + ( real_type )+0.000019991460880166420157071458384227762174474, /* F^(04)(x_21) / ( 4!) */ + ( real_type )-0.00000114170637036691641194364922589110104160779, /* F^(05)(x_21) / ( 5!) */ + ( real_type )+0.000000058291765398212198438612266591374053859548, /* F^(06)(x_21) / ( 6!) */ + ( real_type )-0.00000000275210480620391445747295477810544419595878, /* F^(07)(x_21) / ( 7!) */ + ( real_type )+0.000000000119993910592544586361667648180931290391207, /* F^(08)(x_21) / ( 8!) */ + ( real_type )-0.00000000000460459983760811871620766213379352057403352, /* F^(09)(x_21) / ( 9!) */ + ( real_type )+0.000000000000140490429938408215264064165739100955702144, /* F^(10)(x_21) / (10!) */ + ( real_type )-2.65546448314124686696924872641242459297703e-15, /* F^(11)(x_21) / (11!) */ + ( real_type )-1.43320698037775282014567137506719230912389e-17, /* F^(12)(x_21) / (12!) */ + ( real_type )+3.36865730000474604858049829442501269110068e-18, /* F^(13)(x_21) / (13!) */ + ( real_type )-1.40719839929497201531285356375013040612156e-19, /* F^(14)(x_21) / (14!) */ + ( real_type )+3.39121280056803731261342037219659743174585e-21, /* F^(15)(x_21) / (15!) */ + ( real_type )-4.38391219861644663650817978766218522098214e-23, /* F^(16)(x_21) / (16!) */ + ( real_type )-1.25506812542971962641116932242919186564772e-25, /* F^(17)(x_21) / (17!) */ + ( real_type )+1.93434821321807285983593992752334570388687e-26, /* F^(18)(x_21) / (18!) */ + ( real_type )-4.35157628185851375452443334679781596845357e-28, /* F^(19)(x_21) / (19!) */ + ( real_type )+4.52888385142915344070255147690176672005637e-30, /* F^(20)(x_21) / (20!) */ + ( real_type )+7.87051038465448332387537070382304198875824e-33, /* F^(21)(x_21) / (21!) */ + ( real_type )-1.03494659681936683121726798494946272421871e-33, /* F^(22)(x_21) / (22!) */ + ( real_type )+1.59463247764922035682391437185213060046963e-35, /* F^(23)(x_21) / (23!) */ + ( real_type )-8.85481353977161537247185652482981765237006e-38, /* F^(24)(x_21) / (24!) */ + + /* ===== n = 22, xn = 4.63, yn = 0 ==== */ + ( real_type )+0.110671559758965998392672056514515221809768, /* F^(00)(x_22) / ( 0!) */ + ( real_type )-0.0251681325041061317620531771729380616968488, /* F^(01)(x_22) / ( 1!) */ + ( real_type )+0.00294831644581541355302839828010839866673824, /* F^(02)(x_22) / ( 2!) */ + ( real_type )-0.000238065361903487150894947742544130345417201, /* F^(03)(x_22) / ( 3!) */ + ( real_type )+0.0000149807962105871773378527187695662664927158, /* F^(04)(x_22) / ( 4!) */ + ( real_type )-0.000000789471989432032388221124595002050270696719, /* F^(05)(x_22) / ( 5!) */ + ( real_type )+0.0000000366857002040572487847560494761830098297073, /* F^(06)(x_22) / ( 6!) */ + ( real_type )-0.00000000156465517670376396854728263743529173190661, /* F^(07)(x_22) / ( 7!) */ + ( real_type )+0.0000000000626878019202140311621626190749535755609994, /* F^(08)(x_22) / ( 8!) */ + ( real_type )-0.00000000000233979275413566671823564079346042063653743, /* F^(09)(x_22) / ( 9!) */ + ( real_type )+7.74324707358340963245685677872818285229474e-14, /* F^(10)(x_22) / (10!) */ + ( real_type )-2.06041860973956849674616304847610721009107e-15, /* F^(11)(x_22) / (11!) */ + ( real_type )+3.47732768638393510901338523798613335499488e-17, /* F^(12)(x_22) / (12!) */ + ( real_type )+1.25992588852768878050544656771717474167277e-19, /* F^(13)(x_22) / (13!) */ + ( real_type )-3.32491027869336227803589606934464059276705e-20, /* F^(14)(x_22) / (14!) */ + ( real_type )+1.28885665648453187088339124482621507931416e-21, /* F^(15)(x_22) / (15!) */ + ( real_type )-2.93190195616185116618239115290636803348401e-23, /* F^(16)(x_22) / (16!) */ + ( real_type )+3.822824410297022523141719019879898264855e-25, /* F^(17)(x_22) / (17!) */ + ( real_type )-2.83475903177825000491683337476524588123854e-28, /* F^(18)(x_22) / (18!) */ + ( real_type )-1.10387658533486708056932218695074709707918e-28, /* F^(19)(x_22) / (19!) */ + ( real_type )+2.63094469845385676359933237508425553497991e-30, /* F^(20)(x_22) / (20!) */ + ( real_type )-3.02314972806127298936982598428856820487952e-32, /* F^(21)(x_22) / (21!) */ + ( real_type )+6.08945680725475733822987362162089553308216e-35, /* F^(22)(x_22) / (22!) */ + ( real_type )+4.12900176864527058504025248205525467659817e-36, /* F^(23)(x_22) / (23!) */ + ( real_type )-7.55950957691046266826837931184101647947484e-38, /* F^(24)(x_22) / (24!) */ + + /* ===== n = 23, xn = 4.84, yn = 0 ==== */ + ( real_type )+0.105621693364230228188040070800433983455213, /* F^(00)(x_23) / ( 0!) */ + ( real_type )-0.0228627146851769367882924016154440681217548, /* F^(01)(x_23) / ( 1!) */ + ( real_type )+0.00254098887147115338536762642835028154753412, /* F^(02)(x_23) / ( 2!) */ + ( real_type )-0.000193861832352809470271255388569048108594594, /* F^(03)(x_23) / ( 3!) */ + ( real_type )+0.0000114628885338234626235304767731034008217904, /* F^(04)(x_23) / ( 4!) */ + ( real_type )-0.000000563124384993320283769475975800108647113182, /* F^(05)(x_23) / ( 5!) */ + ( real_type )+0.0000000241183245346675661907048294673828840159447, /* F^(06)(x_23) / ( 6!) */ + ( real_type )-0.000000000935893959277984610497948478230757068337676, /* F^(07)(x_23) / ( 7!) */ + ( real_type )+0.0000000000339444416391772381857615920562586127606164, /* F^(08)(x_23) / ( 8!) */ + ( real_type )-0.00000000000116977320015476599481314918868119087045509, /* F^(09)(x_23) / ( 9!) */ + ( real_type )+3.78512068519039425526446161719236546330453e-14, /* F^(10)(x_23) / (10!) */ + ( real_type )-1.09590426279348176692448555359383883828061e-15, /* F^(11)(x_23) / (11!) */ + ( real_type )+2.59092360287631996890209990692727420143754e-17, /* F^(12)(x_23) / (12!) */ + ( real_type )-4.03904058064902865616612982156171692208601e-19, /* F^(13)(x_23) / (13!) */ + ( real_type )-3.80328476447036746159429054391076273068302e-22, /* F^(14)(x_23) / (14!) */ + ( real_type )+2.72816714482771521170920556408867930773808e-22, /* F^(15)(x_23) / (15!) */ + ( real_type )-1.01222813852448190077903629592785944326905e-23, /* F^(16)(x_23) / (16!) */ + ( real_type )+2.21191054086407633704554123843147746890888e-25, /* F^(17)(x_23) / (17!) */ + ( real_type )-2.93581512577857995310428274031632397395263e-27, /* F^(18)(x_23) / (18!) */ + ( real_type )+1.06766425401756542906913279214761092576444e-29, /* F^(19)(x_23) / (19!) */ + ( real_type )+5.14095792653268359958801071204438255654223e-31, /* F^(20)(x_23) / (20!) */ + ( real_type )-1.37103767364285858063629720451002458308143e-32, /* F^(21)(x_23) / (21!) */ + ( real_type )+1.7316667489351404008941326487608288720572e-34, /* F^(22)(x_23) / (22!) */ + ( real_type )-8.13959642119095191219436319462204117987519e-37, /* F^(23)(x_23) / (23!) */ + ( real_type )-1.24573180480291869573653756619863377818135e-38, /* F^(24)(x_23) / (24!) */ + + /* ===== n = 24, xn = 5.05, yn = 0 ==== */ + ( real_type )+0.101023390864058696333111557061547844543024, /* F^(00)(x_24) / ( 0!) */ + ( real_type )-0.0208679497841720325913616395349749557775523, /* F^(01)(x_24) / ( 1!) */ + ( real_type )+0.00220733560166841545354794318587565199407667, /* F^(02)(x_24) / ( 2!) */ + ( real_type )-0.00015975081649746856327040808930678838751009, /* F^(03)(x_24) / ( 3!) */ + ( real_type )+0.000008922935788533004496702872419394394555841, /* F^(04)(x_24) / ( 4!) */ + ( real_type )-0.000000411728241415548246728771711936877915487584, /* F^(05)(x_24) / ( 5!) */ + ( real_type )+0.0000000164291087118924339308840243099468236801761, /* F^(06)(x_24) / ( 6!) */ + ( real_type )-0.000000000587300404996285259946746227913403972667622, /* F^(07)(x_24) / ( 7!) */ + ( real_type )+0.0000000000193874075732019332916024643132900485132215, /* F^(08)(x_24) / ( 8!) */ + ( real_type )-0.000000000000606046348487599740485317946834452969834527, /* F^(09)(x_24) / ( 9!) */ + ( real_type )+1.81594504330350192013911495457403052600212e-14, /* F^(10)(x_24) / (10!) */ + ( real_type )-5.14849220917722696510182103515284545686095e-16, /* F^(11)(x_24) / (11!) */ + ( real_type )+1.32011708573531215116942191865494956761469e-17, /* F^(12)(x_24) / (12!) */ + ( real_type )-2.81615988281639359679156829455887618714031e-19, /* F^(13)(x_24) / (13!) */ + ( real_type )+4.15741994074926021018047470639608363435813e-21, /* F^(14)(x_24) / (14!) */ + ( real_type )-7.9154092904213058782289628524414348543352e-24, /* F^(15)(x_24) / (15!) */ + ( real_type )-1.85287251907974956583075458470826696613135e-24, /* F^(16)(x_24) / (16!) */ + ( real_type )+6.82117320575600980426374132606138005371195e-26, /* F^(17)(x_24) / (17!) */ + ( real_type )-1.45466821189127454946322490109831608861189e-27, /* F^(18)(x_24) / (18!) */ + ( real_type )+1.97249965342288383328604931976076832069644e-29, /* F^(19)(x_24) / (19!) */ + ( real_type )-1.15508277735445743519864458868715979650134e-31, /* F^(20)(x_24) / (20!) */ + ( real_type )-1.82598210882315447042898626911056884074331e-33, /* F^(21)(x_24) / (21!) */ + ( real_type )+6.08529299318642508638924289681935149791248e-35, /* F^(22)(x_24) / (22!) */ + ( real_type )-8.48651223744877088232015486970807491637591e-37, /* F^(23)(x_24) / (23!) */ + ( real_type )+5.70188336927335796771169480102772265189723e-39, /* F^(24)(x_24) / (24!) */ + + /* ===== n = 25, xn = 5.26, yn = 0 ==== */ + ( real_type )+0.0968172599708731096134786108738627105980966, /* F^(00)(x_25) / ( 0!) */ + ( real_type )-0.019129052324980001625984265315504011137274, /* F^(01)(x_25) / ( 1!) */ + ( real_type )+0.00193098139608976034347912676231112673135214, /* F^(02)(x_25) / ( 2!) */ + ( real_type )-0.000133007514873286550122176636152402566024866, /* F^(03)(x_25) / ( 3!) */ + ( real_type )+0.00000704738582517656713182946974719997727656481, /* F^(04)(x_25) / ( 4!) */ + ( real_type )-0.000000307170049977033815789102718104567652698539, /* F^(05)(x_25) / ( 5!) */ + ( real_type )+0.0000000115115171404456826810478991964404501339667, /* F^(06)(x_25) / ( 6!) */ + ( real_type )-0.000000000383343836270593066177312972376912966835035, /* F^(07)(x_25) / ( 7!) */ + ( real_type )+0.0000000000116592715509744241464812847709951520495991, /* F^(08)(x_25) / ( 8!) */ + ( real_type )-0.000000000000332015001890557204114831678641108828441684, /* F^(09)(x_25) / ( 9!) */ + ( real_type )+9.03945523192835037733762458889594780509207e-15, /* F^(10)(x_25) / (10!) */ + ( real_type )-2.37596363289061558591076623041169389864904e-16, /* F^(11)(x_25) / (11!) */ + ( real_type )+5.95470163552159966416706266444859635520569e-18, /* F^(12)(x_25) / (12!) */ + ( real_type )-1.36577964296364023961562902921059468862886e-19, /* F^(13)(x_25) / (13!) */ + ( real_type )+2.66099407214586549550345833447815706937032e-21, /* F^(14)(x_25) / (14!) */ + ( real_type )-3.77747829364509565402095237748454360723338e-23, /* F^(15)(x_25) / (15!) */ + ( real_type )+1.67305017376781383220266451758748881560193e-25, /* F^(16)(x_25) / (16!) */ + ( real_type )+1.02447834636158711360538807738053366583138e-26, /* F^(17)(x_25) / (17!) */ + ( real_type )-3.9358879368393963824727831578025243300647e-28, /* F^(18)(x_25) / (18!) */ + ( real_type )+8.32335362819960357103913466650281736177834e-30, /* F^(19)(x_25) / (19!) */ + ( real_type )-1.15459623456847536194152988887927790761274e-31, /* F^(20)(x_25) / (20!) */ + ( real_type )+8.68546699819818379017046436041817942098627e-34, /* F^(21)(x_25) / (21!) */ + ( real_type )+3.82966482382075819665023947535535337877757e-36, /* F^(22)(x_25) / (22!) */ + ( real_type )-2.25465127024739432628226965563125083036215e-37, /* F^(23)(x_25) / (23!) */ + ( real_type )+3.54219235081606151986788580225182648169217e-39, /* F^(24)(x_25) / (24!) */ + + /* ===== n = 26, xn = 5.47, yn = 0 ==== */ + ( real_type )+0.0929541296762774182586342664150765738247869, /* F^(00)(x_26) / ( 0!) */ + ( real_type )-0.0176031038245105450463507052150011604903343, /* F^(01)(x_26) / ( 1!) */ + ( real_type )+0.00169985089210067073029661299309372995910267, /* F^(02)(x_26) / ( 2!) */ + ( real_type )-0.000111754461352275483674643118012722684469264, /* F^(03)(x_26) / ( 3!) */ + ( real_type )+0.00000563645831578333830367053463301789246977068, /* F^(04)(x_26) / ( 4!) */ + ( real_type )-0.000000233086203865930762224381294249391501412678, /* F^(05)(x_26) / ( 5!) */ + ( real_type )+0.00000000825270059087751580419135596424282128971891, /* F^(06)(x_26) / ( 6!) */ + ( real_type )-0.000000000258162296904720614351803401652859308368503, /* F^(07)(x_26) / ( 7!) */ + ( real_type )+0.00000000000731692690788622680628027567948022291891214, /* F^(08)(x_26) / ( 8!) */ + ( real_type )-0.000000000000192105189436590025715179131925924622615454, /* F^(09)(x_26) / ( 9!) */ + ( real_type )+4.77062527050408540120140298185795386888127e-15, /* F^(10)(x_26) / (10!) */ + ( real_type )-1.14088880559289921767240486863117816352493e-16, /* F^(11)(x_26) / (11!) */ + ( real_type )+2.64990691256313330971497153023446708770736e-18, /* F^(12)(x_26) / (12!) */ + ( real_type )-5.91402001584283922483862504102115174082415e-20, /* F^(13)(x_26) / (13!) */ + ( real_type )+1.2232223116374304928367946727278289914898e-21, /* F^(14)(x_26) / (14!) */ + ( real_type )-2.196651237206212536302591527734887109977e-23, /* F^(15)(x_26) / (15!) */ + ( real_type )+3.02262482985354482455526048928245706872611e-25, /* F^(16)(x_26) / (16!) */ + ( real_type )-1.94866683250886469181322277748883661769632e-27, /* F^(17)(x_26) / (17!) */ + ( real_type )-4.39120937363081997916328063680466075335122e-29, /* F^(18)(x_26) / (18!) */ + ( real_type )+1.9314138648659080190160037648366554123487e-30, /* F^(19)(x_26) / (19!) */ + ( real_type )-4.13039337394068965102604622733555157833216e-32, /* F^(20)(x_26) / (20!) */ + ( real_type )+5.87365064393293718113780394508300814858586e-34, /* F^(21)(x_26) / (21!) */ + ( real_type )-5.15783839336173358374651614539112353473149e-36, /* F^(22)(x_26) / (22!) */ + ( real_type )+5.79949548700775303144667910744076788885022e-39, /* F^(23)(x_26) / (23!) */ + ( real_type )+6.68434848310373718658836046324219708704315e-40, /* F^(24)(x_26) / (24!) */ + + /* ===== n = 27, xn = 5.68, yn = 0 ==== */ + ( real_type )+0.0893928904886096481956461370618202971912525, /* F^(00)(x_27) / ( 0!) */ + ( real_type )-0.0162560181862991417195905673133943122080628, /* F^(01)(x_27) / ( 1!) */ + ( real_type )+0.00150486960096642002952672576380696698776006, /* F^(02)(x_27) / ( 2!) */ + ( real_type )-0.00009466367408808788514141035007813430220804, /* F^(03)(x_27) / ( 3!) */ + ( real_type )+0.0000045581315486370476323240552315561248034948, /* F^(04)(x_27) / ( 4!) */ + ( real_type )-0.000000179476864565826028515497477176625610261423, /* F^(05)(x_27) / ( 5!) */ + ( real_type )+0.00000000603099850376733847376421570042328004635142, /* F^(06)(x_27) / ( 6!) */ + ( real_type )-0.000000000178312425238288838140181548170306257519158, /* F^(07)(x_27) / ( 7!) */ + ( real_type )+0.0000000000047498171777647858118495486969332040352233, /* F^(08)(x_27) / ( 8!) */ + ( real_type )-0.000000000000116294015929383844911565760547155466422352, /* F^(09)(x_27) / ( 9!) */ + ( real_type )+2.66564410594504853446590161552751178660177e-15, /* F^(10)(x_27) / (10!) */ + ( real_type )-5.82261271650595316704717620138521725053665e-17, /* F^(11)(x_27) / (11!) */ + ( real_type )+1.23108725804989369563624077229992609116945e-18, /* F^(12)(x_27) / (12!) */ + ( real_type )-2.5391501624443636539157132451591214462565e-20, /* F^(13)(x_27) / (13!) */ + ( real_type )+5.06445111863624097229251459821373287112803e-22, /* F^(14)(x_27) / (14!) */ + ( real_type )-9.46721731907670937917665347745540685672109e-24, /* F^(15)(x_27) / (15!) */ + ( real_type )+1.56645694394062804129743897324984282223478e-25, /* F^(16)(x_27) / (16!) */ + ( real_type )-2.06716970612099441995316492701696294131828e-27, /* F^(17)(x_27) / (17!) */ + ( real_type )+1.56529332827074955881272977192000268754997e-29, /* F^(18)(x_27) / (18!) */ + ( real_type )+1.43314090403776434146282719033166098721816e-31, /* F^(19)(x_27) / (19!) */ + ( real_type )-8.19233027534614479509180702726523527254846e-33, /* F^(20)(x_27) / (20!) */ + ( real_type )+1.78690364925121377462333132981567303282619e-34, /* F^(21)(x_27) / (21!) */ + ( real_type )-2.58513507341285575832196527290508837393742e-36, /* F^(22)(x_27) / (22!) */ + ( real_type )+2.48474959601635206715023186865533340654853e-38, /* F^(23)(x_27) / (23!) */ + ( real_type )-1.00143128418207514339562463797124067557329e-40, /* F^(24)(x_27) / (24!) */ + + /* ===== n = 28, xn = 5.89, yn = 0 ==== */ + ( real_type )+0.0860988731325164934603718599503360383247846, /* F^(00)(x_28) / ( 0!) */ + ( real_type )-0.0150603990359838665540548186144362977715939, /* F^(01)(x_28) / ( 1!) */ + ( real_type )+0.00133910796085209433349537986648421222479166, /* F^(02)(x_28) / ( 2!) */ + ( real_type )-0.0000807754476443020440046107703718686441856762, /* F^(03)(x_28) / ( 3!) */ + ( real_type )+0.00000372258586029757224239869113318768045838276, /* F^(04)(x_28) / ( 4!) */ + ( real_type )-0.000000139984168601656248347407416718913821122934, /* F^(05)(x_28) / ( 5!) */ + ( real_type )+0.00000000448070355045241059496544094976786605611339, /* F^(06)(x_28) / ( 6!) */ + ( real_type )-0.000000000125790756345345876860056949513434641860784, /* F^(07)(x_28) / ( 7!) */ + ( real_type )+0.00000000000316884058180342121227306070770583507460236, /* F^(08)(x_28) / ( 8!) */ + ( real_type )-7.29786859542387245360322320651458948945323e-14, /* F^(09)(x_28) / ( 9!) */ + ( real_type )+1.56193501609445932346110138359359030964164e-15, /* F^(10)(x_28) / (10!) */ + ( real_type )-3.15591287425192326213512780819826412266047e-17, /* F^(11)(x_28) / (11!) */ + ( real_type )+6.11648146264551122104152850755741590802321e-19, /* F^(12)(x_28) / (12!) */ + ( real_type )-1.15452954672319756394208291845842663821228e-20, /* F^(13)(x_28) / (13!) */ + ( real_type )+2.14353276925691327682315763455264291020656e-22, /* F^(14)(x_28) / (14!) */ + ( real_type )-3.90126397133087538844604164962997023146852e-24, /* F^(15)(x_28) / (15!) */ + ( real_type )+6.80211406317909830810967991313243783397159e-26, /* F^(16)(x_28) / (16!) */ + ( real_type )-1.08745830806963218298991174502425316433403e-27, /* F^(17)(x_28) / (17!) */ + ( real_type )+1.48705986805899721540910750765418743217196e-29, /* F^(18)(x_28) / (18!) */ + ( real_type )-1.50935819713699287581011725835971202171278e-31, /* F^(19)(x_28) / (19!) */ + ( real_type )+5.35319244037986083309650897603107423247461e-34, /* F^(20)(x_28) / (20!) */ + ( real_type )+1.99148522247905203961077074700307362066713e-35, /* F^(21)(x_28) / (21!) */ + ( real_type )-5.90430602015669721352981879495268153704868e-37, /* F^(22)(x_28) / (22!) */ + ( real_type )+9.73615299333029029340284708759513747826933e-39, /* F^(23)(x_28) / (23!) */ + ( real_type )-1.10142930591478369707134190469279600541791e-40, /* F^(24)(x_28) / (24!) */ + + /* ===== n = 29, xn = 6.11, yn = 0 ==== */ + ( real_type )+0.0830426115241511768702601167929215319557302, /* F^(00)(x_29) / ( 0!) */ + ( real_type )-0.0139939933475300665126639886815678297843283, /* F^(01)(x_29) / ( 1!) */ + ( real_type )+0.00119720024617408436274015019885580684650707, /* F^(02)(x_29) / ( 2!) */ + ( real_type )-0.000069383529294247268393221731630285710921236, /* F^(03)(x_29) / ( 3!) */ + ( real_type )+0.00000306724455502290414535084729151593413634048, /* F^(04)(x_29) / ( 4!) */ + ( real_type )-0.000000110436228357820308639031578961302740870094, /* F^(05)(x_29) / ( 5!) */ + ( real_type )+0.00000000337740696029274524195773709315377664834972, /* F^(06)(x_29) / ( 6!) */ + ( real_type )-0.0000000000903640907859522407016493719388695976851966, /* F^(07)(x_29) / ( 7!) */ + ( real_type )+0.00000000000216280767684000308053329785816748908626304, /* F^(08)(x_29) / ( 8!) */ + ( real_type )-4.71357698463986518829578152306387111138886e-14, /* F^(09)(x_29) / ( 9!) */ + ( real_type )+9.49286299377792925644228521064589312907147e-16, /* F^(10)(x_29) / (10!) */ + ( real_type )-1.78853832272001136837611107551993609288667e-17, /* F^(11)(x_29) / (11!) */ + ( real_type )+3.18003011013375689551071852866279283002898e-19, /* F^(12)(x_29) / (12!) */ + ( real_type )-5.33779248488616003279246435535612880703449e-21, /* F^(13)(x_29) / (13!) */ + ( real_type )+8.29271196378662516895969756642253193385493e-23, /* F^(14)(x_29) / (14!) */ + ( real_type )-1.11129774700276490382501419970146373684782e-24, /* F^(15)(x_29) / (15!) */ + ( real_type )+9.81476991955460451994682990679027615138686e-27, /* F^(16)(x_29) / (16!) */ + ( real_type )+6.59817023847930444675192625757868940733063e-29, /* F^(17)(x_29) / (17!) */ + ( real_type )-6.05046335615849401088978379324953181410507e-30, /* F^(18)(x_29) / (18!) */ + ( real_type )+1.84343606005861989747364378222769095723746e-31, /* F^(19)(x_29) / (19!) */ + ( real_type )-4.03510393171618285132745581875881106242127e-33, /* F^(20)(x_29) / (20!) */ + ( real_type )+6.99237690353283870359904320074222724828864e-35, /* F^(21)(x_29) / (21!) */ + ( real_type )-9.70062829257570175376931959366235477975337e-37, /* F^(22)(x_29) / (22!) */ + ( real_type )+1.03748042130053048219412838831852379611648e-38, /* F^(23)(x_29) / (23!) */ + ( real_type )-7.34870318809140397718561534239916190747651e-41, /* F^(24)(x_29) / (24!) */ + + /* ===== n = 30, xn = 6.32, yn = 0 ==== */ + ( real_type )+0.0801988855781815476048148063389218190822483, /* F^(00)(x_30) / ( 0!) */ + ( real_type )-0.0130385546717669321094332283948969242733036, /* F^(01)(x_30) / ( 1!) */ + ( real_type )+0.0010749403849100650474113645798787890256809, /* F^(02)(x_30) / ( 2!) */ + ( real_type )-0.0000599599626599058778856816902510612625796367, /* F^(03)(x_30) / ( 3!) */ + ( real_type )+0.00000254762992165543959991304990352496409297204, /* F^(04)(x_30) / ( 4!) */ + ( real_type )-0.0000000880242861646308907708538750476593209685254, /* F^(05)(x_30) / ( 5!) */ + ( real_type )+0.0000000025787153142252531263023922052373658910048, /* F^(06)(x_30) / ( 6!) */ + ( real_type )-0.0000000000659554561812371477894057549611681433385064, /* F^(07)(x_30) / ( 7!) */ + ( real_type )+0.00000000000150540230779121136359923920371245748666512, /* F^(08)(x_30) / ( 8!) */ + ( real_type )-3.11943710260001823342452526401037310141213e-14, /* F^(09)(x_30) / ( 9!) */ + ( real_type )+5.95003141525986141673723612420494394989833e-16, /* F^(10)(x_30) / (10!) */ + ( real_type )-1.05533467017990357744399768495719019867982e-17, /* F^(11)(x_30) / (11!) */ + ( real_type )+1.7446557398863574805101185013132888549523e-19, /* F^(12)(x_30) / (12!) */ + ( real_type )-2.63245298502303546133369101954168037155113e-21, /* F^(13)(x_30) / (13!) */ + ( real_type )+3.27101029022496970793034402411937716813582e-23, /* F^(14)(x_30) / (14!) */ + ( real_type )-1.64957933511840205224964422538842247892344e-25, /* F^(15)(x_30) / (15!) */ + ( real_type )-8.89714020892082978910271663668938410733466e-27, /* F^(16)(x_30) / (16!) */ + ( real_type )+4.60223773853873550974855985935928089821181e-28, /* F^(17)(x_30) / (17!) */ + ( real_type )-1.47118227017621505923178270265427631684261e-29, /* F^(18)(x_30) / (18!) */ + ( real_type )+3.73123476577127347382778286732645647290497e-31, /* F^(19)(x_30) / (19!) */ + ( real_type )-7.91131433881398204116210708946158786472523e-33, /* F^(20)(x_30) / (20!) */ + ( real_type )+1.41995564604226489790937059968221641438135e-34, /* F^(21)(x_30) / (21!) */ + ( real_type )-2.14911090085943268075158967412045707612973e-36, /* F^(22)(x_30) / (22!) */ + ( real_type )+2.6914889072897306058585986400720926401146e-38, /* F^(23)(x_30) / (23!) */ + ( real_type )-2.65795724676355079511961338700781694009466e-40, /* F^(24)(x_30) / (24!) */ + + /* ===== n = 31, xn = 6.53, yn = 0 ==== */ + ( real_type )+0.0775459713212218568287642349004942070222095, /* F^(00)(x_31) / ( 0!) */ + ( real_type )-0.0121789940875273726466095095872735017742981, /* F^(01)(x_31) / ( 1!) */ + ( real_type )+0.000968995046057339367939462082127176883415034, /* F^(02)(x_31) / ( 2!) */ + ( real_type )-0.0000521045833871349578921811811847769816422397, /* F^(03)(x_31) / ( 3!) */ + ( real_type )+0.00000213157707220341212398710757247883305852731, /* F^(04)(x_31) / ( 4!) */ + ( real_type )-0.0000000708159404813980956079093136157712741800099, /* F^(05)(x_31) / ( 5!) */ + ( real_type )+0.00000000199176533719414501619349647338366557741352, /* F^(06)(x_31) / ( 6!) */ + ( real_type )-0.0000000000488258294757894893143253149485509602748682, /* F^(07)(x_31) / ( 7!) */ + ( real_type )+0.00000000000106608989151703959268583905770865434868472, /* F^(08)(x_31) / ( 8!) */ + ( real_type )-2.10965582106729948419588236977493159586462e-14, /* F^(09)(x_31) / ( 9!) */ + ( real_type )+3.84567371261673623003789048913247308733276e-16, /* F^(10)(x_31) / (10!) */ + ( real_type )-6.61408737378759289086973189495003408208124e-18, /* F^(11)(x_31) / (11!) */ + ( real_type )+1.13957677025151360460935886420522840906283e-19, /* F^(12)(x_31) / (12!) */ + ( real_type )-2.27869632053674638200024453677068449328115e-21, /* F^(13)(x_31) / (13!) */ + ( real_type )+6.23011900036089911631391130381942451120618e-23, /* F^(14)(x_31) / (14!) */ + ( real_type )-2.16740637447174686974612563930726583423839e-24, /* F^(15)(x_31) / (15!) */ + ( real_type )+7.80606701329188119243747247177246273243316e-26, /* F^(16)(x_31) / (16!) */ + ( real_type )-2.58813623267045458488564332549797504242916e-27, /* F^(17)(x_31) / (17!) */ + ( real_type )+7.59209273772858754947970828329527264826576e-29, /* F^(18)(x_31) / (18!) */ + ( real_type )-1.94846870268363437090389080837664826461829e-30, /* F^(19)(x_31) / (19!) */ + ( real_type )+4.36024188830223108698741625925371185908378e-32, /* F^(20)(x_31) / (20!) */ + ( real_type )-8.48706211706722467913955698207380324726019e-34, /* F^(21)(x_31) / (21!) */ + ( real_type )+1.43083433429325519628696887773394419807756e-35, /* F^(22)(x_31) / (22!) */ + ( real_type )-2.0719550851649563612477704899890072766329e-37, /* F^(23)(x_31) / (23!) */ + ( real_type )+2.53514386897189688402226420415725191470297e-39, /* F^(24)(x_31) / (24!) */ + + /* ===== n = 32, xn = 6.74, yn = 0 ==== */ + ( real_type )+0.0750650468966655593891682810790162389606471, /* F^(00)(x_32) / ( 0!) */ + ( real_type )-0.0114027371340201124678212798888871699279703, /* F^(01)(x_32) / ( 1!) */ + ( real_type )+0.000876696371524439117515950044128960039789419, /* F^(02)(x_32) / ( 2!) */ + ( real_type )-0.0000455103249349372206834744346759897343505116, /* F^(03)(x_32) / ( 3!) */ + ( real_type )+0.00000179546867555164153407940469098230008669161, /* F^(04)(x_32) / ( 4!) */ + ( real_type )-0.0000000574566190722454962445810052821572175241938, /* F^(05)(x_32) / ( 5!) */ + ( real_type )+0.00000000155457974898007392266315151810157958976225, /* F^(06)(x_32) / ( 6!) */ + ( real_type )-0.0000000000366063318453391613614537764725862843637971, /* F^(07)(x_32) / ( 7!) */ + ( real_type )+0.000000000000766508012876523080013459254805402645478486, /* F^(08)(x_32) / ( 8!) */ + ( real_type )-1.45198018395249390018233411039259480595753e-14, /* F^(09)(x_32) / ( 9!) */ + ( real_type )+2.53001108126174301908793936007878555751149e-16, /* F^(10)(x_32) / (10!) */ + ( real_type )-4.17269973471328816283625970288049156154619e-18, /* F^(11)(x_32) / (11!) */ + ( real_type )+7.09822017596356772241763374905232189567591e-20, /* F^(12)(x_32) / (12!) */ + ( real_type )-1.54403441000975392246129869213550040019216e-21, /* F^(13)(x_32) / (13!) */ + ( real_type )+5.04259863183984349786389414981976081972329e-23, /* F^(14)(x_32) / (14!) */ + ( real_type )-2.03932209498361456771200500997582999816281e-24, /* F^(15)(x_32) / (15!) */ + ( real_type )+8.10692073004103478086371891676639872345168e-26, /* F^(16)(x_32) / (16!) */ + ( real_type )-2.89752815394626903817745068393813342780203e-27, /* F^(17)(x_32) / (17!) */ + ( real_type )+9.10581035102871319938660942569976270514736e-29, /* F^(18)(x_32) / (18!) */ + ( real_type )-2.50676055082085870538910112479392610866416e-30, /* F^(19)(x_32) / (19!) */ + ( real_type )+6.04755913144165169695615402524049770965427e-32, /* F^(20)(x_32) / (20!) */ + ( real_type )-1.279258423431315902202962189092005325295e-33, /* F^(21)(x_32) / (21!) */ + ( real_type )+2.3712280776418972346327174489791078851637e-35, /* F^(22)(x_32) / (22!) */ + ( real_type )-3.84112595048894458974787618536937668168154e-37, /* F^(23)(x_32) / (23!) */ + ( real_type )+5.40533740999225106366024015301239529521824e-39, /* F^(24)(x_32) / (24!) */ + + /* ===== n = 33, xn = 6.95, yn = 0 ==== */ + ( real_type )+0.0727397173579474671216481741375405058188894, /* F^(00)(x_33) / ( 0!) */ + ( real_type )-0.0106992306577962968612152868161260458445677, /* F^(01)(x_33) / ( 1!) */ + ( real_type )+0.000795889921792347021904406360078059892254859, /* F^(02)(x_33) / ( 2!) */ + ( real_type )-0.0000399389289769003218741184312389116037787612, /* F^(03)(x_33) / ( 3!) */ + ( real_type )+0.00000152172666841651899676377154296059472151763, /* F^(04)(x_33) / ( 4!) */ + ( real_type )-0.0000000469810845924377309491076079141234200178414, /* F^(05)(x_33) / ( 5!) */ + ( real_type )+0.00000000122497610005557823134871720173635541160269, /* F^(06)(x_33) / ( 6!) */ + ( real_type )-0.0000000000277624289250630097856595808877706571362691, /* F^(07)(x_33) / ( 7!) */ + ( real_type )+0.000000000000558726133858607237416990879852884897023998, /* F^(08)(x_33) / ( 8!) */ + ( real_type )-1.01573501914379475592863697570450504552261e-14, /* F^(09)(x_33) / ( 9!) */ + ( real_type )+1.69723448434472881168337931418313178967635e-16, /* F^(10)(x_33) / (10!) */ + ( real_type )-2.70076540201680556058751595756078011274976e-18, /* F^(11)(x_33) / (11!) */ + ( real_type )+4.63028869931829205709563589201059245799967e-20, /* F^(12)(x_33) / (12!) */ + ( real_type )-1.14342522365215582464948457277262387616351e-21, /* F^(13)(x_33) / (13!) */ + ( real_type )+4.47146505377436888630817534295902334359437e-23, /* F^(14)(x_33) / (14!) */ + ( real_type )-2.03534151860850022130592884815077022711261e-24, /* F^(15)(x_33) / (15!) */ + ( real_type )+8.71819585150137060656972358104416773557551e-26, /* F^(16)(x_33) / (16!) */ + ( real_type )-3.31125495197021095656182873678802019823693e-27, /* F^(17)(x_33) / (17!) */ + ( real_type )+1.10346651768449572321957900702581498548611e-28, /* F^(18)(x_33) / (18!) */ + ( real_type )-3.22803454527550424792306565478274732924118e-30, /* F^(19)(x_33) / (19!) */ + ( real_type )+8.30931337389255134557812343805302132286045e-32, /* F^(20)(x_33) / (20!) */ + ( real_type )-1.88606297542103778045502895523955291631965e-33, /* F^(21)(x_33) / (21!) */ + ( real_type )+3.77948696827557687031908139296589429703465e-35, /* F^(22)(x_33) / (22!) */ + ( real_type )-6.68600184746185588813456028870497882874813e-37, /* F^(23)(x_33) / (23!) */ + ( real_type )+1.04227659938108605447494489318167022323785e-38, /* F^(24)(x_33) / (24!) */ + + /* ===== n = 34, xn = 7.16, yn = 0 ==== */ + ( real_type )+0.0705556310552950551974987896970666311765803, /* F^(00)(x_34) / ( 0!) */ + ( real_type )-0.0100595603705396188286352999299282772334269, /* F^(01)(x_34) / ( 1!) */ + ( real_type )+0.000724821587967946082042210424106633058656104, /* F^(02)(x_34) / ( 2!) */ + ( real_type )-0.0000352036543101084993898152299167443877794497, /* F^(03)(x_34) / ( 3!) */ + ( real_type )+0.00000129710698932739145574080898165687869244224, /* F^(04)(x_34) / ( 4!) */ + ( real_type )-0.0000000386913371600414532294898192445649741704076, /* F^(05)(x_34) / ( 5!) */ + ( real_type )+0.000000000973728930764764587219148476428098394026654, /* F^(06)(x_34) / ( 6!) */ + ( real_type )-0.0000000000212772651139492296481403914360676929098329, /* F^(07)(x_34) / ( 7!) */ + ( real_type )+0.000000000000412384093973011533533097000599286953214275, /* F^(08)(x_34) / ( 8!) */ + ( real_type )-7.2134270756337122200078106839901971069706e-15, /* F^(09)(x_34) / ( 9!) */ + ( real_type )+1.16249936267654220147070764812778556151909e-16, /* F^(10)(x_34) / (10!) */ + ( real_type )-1.83077850256365277379248316219166075269124e-18, /* F^(11)(x_34) / (11!) */ + ( real_type )+3.52269969929421879877548344007487447485431e-20, /* F^(12)(x_34) / (12!) */ + ( real_type )-1.1785356349928496489038341817881174589428e-21, /* F^(13)(x_34) / (13!) */ + ( real_type )+5.84292348922356590330974222631036658965932e-23, /* F^(14)(x_34) / (14!) */ + ( real_type )-2.96932618780125441577801483023917832481449e-24, /* F^(15)(x_34) / (15!) */ + ( real_type )+1.35615952855320196669123085134314436319649e-25, /* F^(16)(x_34) / (16!) */ + ( real_type )-5.43350746323422143877013967961687333963394e-27, /* F^(17)(x_34) / (17!) */ + ( real_type )+1.90833737544364842582854761528014074971343e-28, /* F^(18)(x_34) / (18!) */ + ( real_type )-5.89532565011347414410924482866295352263933e-30, /* F^(19)(x_34) / (19!) */ + ( real_type )+1.60771197597649354412153732508675744394747e-31, /* F^(20)(x_34) / (20!) */ + ( real_type )-3.88216338600608883944766133275287437118888e-33, /* F^(21)(x_34) / (21!) */ + ( real_type )+8.31915105540509402459380442310313454471927e-35, /* F^(22)(x_34) / (22!) */ + ( real_type )-1.58417430417239981554377158290077572025288e-36, /* F^(23)(x_34) / (23!) */ + ( real_type )+2.68136483197010724624802206254574680293882e-38, /* F^(24)(x_34) / (24!) */ + + /* ===== n = 35, xn = 7.37, yn = 0 ==== */ + ( real_type )+0.0685001674055154771379396766403437184862358, /* F^(00)(x_35) / ( 0!) */ + ( real_type )-0.00947615123917542694211294609990154213554242, /* F^(01)(x_35) / ( 1!) */ + ( real_type )+0.000662052441572780858129117579442858824440228, /* F^(02)(x_35) / ( 2!) */ + ( real_type )-0.0000311567841874007232271427826204398035194872, /* F^(03)(x_35) / ( 3!) */ + ( real_type )+0.00000111151966847690157647661092352917037739575, /* F^(04)(x_35) / ( 4!) */ + ( real_type )-0.000000032075910301527500753204974287615160828677, /* F^(05)(x_35) / ( 5!) */ + ( real_type )+0.000000000780271058484280704330554393686031744280665, /* F^(06)(x_35) / ( 6!) */ + ( real_type )-0.000000000016464516075106788648153671083451602955719, /* F^(07)(x_35) / ( 7!) */ + ( real_type )+0.000000000000307818525000779859834136244762594321433838, /* F^(08)(x_35) / ( 8!) */ + ( real_type )-5.18696273129377029736584936335420915561424e-15, /* F^(09)(x_35) / ( 9!) */ + ( real_type )+8.03533411012085464306714064649061639359397e-17, /* F^(10)(x_35) / (10!) */ + ( real_type )-1.21290871223230330563416185048086654395252e-18, /* F^(11)(x_35) / (11!) */ + ( real_type )+2.26718458210250306415179042386542236045679e-20, /* F^(12)(x_35) / (12!) */ + ( real_type )-7.80828166039047425731448287016683869793631e-22, /* F^(13)(x_35) / (13!) */ + ( real_type )+4.09130873168466783681578914401681947307366e-23, /* F^(14)(x_35) / (14!) */ + ( real_type )-2.18392367539774725071870210306185361146395e-24, /* F^(15)(x_35) / (15!) */ + ( real_type )+1.04410390880350389833646977890325430016372e-25, /* F^(16)(x_35) / (16!) */ + ( real_type )-4.3795466613419156239453266318454571881059e-27, /* F^(17)(x_35) / (17!) */ + ( real_type )+1.61287383466481319299065554066232809336474e-28, /* F^(18)(x_35) / (18!) */ + ( real_type )-5.23615071471443063395282609992166188155637e-30, /* F^(19)(x_35) / (19!) */ + ( real_type )+1.50466767524615513382048903025216516705263e-31, /* F^(20)(x_35) / (20!) */ + ( real_type )-3.84079347761486149580291644325294431967302e-33, /* F^(21)(x_35) / (21!) */ + ( real_type )+8.73367741794568834985357521169990935164885e-35, /* F^(22)(x_35) / (22!) */ + ( real_type )-1.77297770821577975894771713104874435512782e-36, /* F^(23)(x_35) / (23!) */ + ( real_type )+3.21763722909128678208840284091580965392023e-38, /* F^(24)(x_35) / (24!) */ + + /* ===== n = 36, xn = 7.58, yn = 0 ==== */ + ( real_type )+0.0665621808371330882020689517139535800601475, /* F^(00)(x_36) / ( 0!) */ + ( real_type )-0.00894253058391201779222060603577785992763496, /* F^(01)(x_36) / ( 1!) */ + ( real_type )+0.000606393899415837230457711771993865880389527, /* F^(02)(x_36) / ( 2!) */ + ( real_type )-0.0000276804791662735358191925162179178458885086, /* F^(03)(x_36) / ( 3!) */ + ( real_type )+0.000000957199366072937349548528053276738434761006, /* F^(04)(x_36) / ( 4!) */ + ( real_type )-0.0000000267555059987523180730492840721351800606201, /* F^(05)(x_36) / ( 5!) */ + ( real_type )+0.000000000629927698079449299303283560509846914591026, /* F^(06)(x_36) / ( 6!) */ + ( real_type )-0.0000000000128546069331722595767055062694676684394577, /* F^(07)(x_36) / ( 7!) */ + ( real_type )+0.00000000000023233494481393332877681883184916260423221, /* F^(08)(x_36) / ( 8!) */ + ( real_type )-3.80317165292044313449301788524718568081929e-15, /* F^(09)(x_36) / ( 9!) */ + ( real_type )+6.01808784468024306355437780668634574157414e-17, /* F^(10)(x_36) / (10!) */ + ( real_type )-1.25273628167960641576281202630873902118957e-18, /* F^(11)(x_36) / (11!) */ + ( real_type )+5.58810193840543773485036351040313379420385e-20, /* F^(12)(x_36) / (12!) */ + ( real_type )-3.77662265876328437654057823531921599476563e-21, /* F^(13)(x_36) / (13!) */ + ( real_type )+2.48206983760548409105493193441545364861339e-22, /* F^(14)(x_36) / (14!) */ + ( real_type )-1.43234569338266720743672641784392723706688e-23, /* F^(15)(x_36) / (15!) */ + ( real_type )+7.18824953004802362669624410433877835227482e-25, /* F^(16)(x_36) / (16!) */ + ( real_type )-3.15067157617091099581538851514050010128698e-26, /* F^(17)(x_36) / (17!) */ + ( real_type )+1.2129871589185615564986687254996480230665e-27, /* F^(18)(x_36) / (18!) */ + ( real_type )-4.12343087985686007145370172975571463748884e-29, /* F^(19)(x_36) / (19!) */ + ( real_type )+1.24335613370403068862732966581366252431882e-30, /* F^(20)(x_36) / (20!) */ + ( real_type )-3.33860102386299650422786804690669000996304e-32, /* F^(21)(x_36) / (21!) */ + ( real_type )+8.00923055792792162219233779412480200740577e-34, /* F^(22)(x_36) / (22!) */ + ( real_type )-1.72121276600030398898114889804711061945179e-35, /* F^(23)(x_36) / (23!) */ + ( real_type )+3.32038430824648915175995251189074044342691e-37, /* F^(24)(x_36) / (24!) */ + + /* ===== n = 37, xn = 7.79, yn = 0 ==== */ + ( real_type )+0.0647317893379955483199347836217465191040756, /* F^(00)(x_37) / ( 0!) */ + ( real_type )-0.00845313916035161541505799535434313957824386, /* F^(01)(x_37) / ( 1!) */ + ( real_type )+0.000556857850266409427349512904758495935038776, /* F^(02)(x_37) / ( 2!) */ + ( real_type )-0.000024679997848985285151218687447938846402763, /* F^(03)(x_37) / ( 3!) */ + ( real_type )+0.000000828113793613347734320634983498513202478058, /* F^(04)(x_37) / ( 4!) */ + ( real_type )-0.0000000224456912509277891115200192537538182542915, /* F^(05)(x_37) / ( 5!) */ + ( real_type )+0.000000000512075700019272277735817928940419520586742, /* F^(06)(x_37) / ( 6!) */ + ( real_type )-0.0000000000101163937878574118532405049200674039681754, /* F^(07)(x_37) / ( 7!) */ + ( real_type )+0.000000000000176490992446923055082397318165549529329088, /* F^(08)(x_37) / ( 8!) */ + ( real_type )-2.72154858477356310093792491739212466699777e-15, /* F^(09)(x_37) / ( 9!) */ + ( real_type )+3.17864161902745847150825822804240366041147e-17, /* F^(10)(x_37) / (10!) */ + ( real_type )+4.05874480129736188949943247378913727479148e-19, /* F^(11)(x_37) / (11!) */ + ( real_type )-8.40447608497094344551952218827835718213795e-20, /* F^(12)(x_37) / (12!) */ + ( real_type )+7.34723762828915994299413418809739711807905e-21, /* F^(13)(x_37) / (13!) */ + ( real_type )-5.18021760748734333966878711744484134526079e-22, /* F^(14)(x_37) / (14!) */ + ( real_type )+3.12027927596953520763146990834294052787075e-23, /* F^(15)(x_37) / (15!) */ + ( real_type )-1.62905116426254724353569166939022359405153e-24, /* F^(16)(x_37) / (16!) */ + ( real_type )+7.43202534724890698219724714307645097375702e-26, /* F^(17)(x_37) / (17!) */ + ( real_type )-2.98203160218235841330522148731538068129812e-27, /* F^(18)(x_37) / (18!) */ + ( real_type )+1.05814702049804957298382528594052029575376e-28, /* F^(19)(x_37) / (19!) */ + ( real_type )-3.33645902662862472397439602044977696000381e-30, /* F^(20)(x_37) / (20!) */ + ( real_type )+9.38708602443541492367954480365194272116094e-32, /* F^(21)(x_37) / (21!) */ + ( real_type )-2.36498179470897079020349913667177614633733e-33, /* F^(22)(x_37) / (22!) */ + ( real_type )+5.35164694506171200553679454663226240499125e-35, /* F^(23)(x_37) / (23!) */ + ( real_type )-1.09041609387260215126212664931441059878883e-36, /* F^(24)(x_37) / (24!) */ + + /* ===== n = 38, xn = 8.00, yn = 0 ==== */ + ( real_type )+0.0630001987075533879192457295169672688411135, /* F^(00)(x_38) / ( 0!) */ + ( real_type )-0.00800317932085420670793167227147630145781639, /* F^(01)(x_38) / ( 1!) */ + ( real_type )+0.000512617929640132872103824327421571410708782, /* F^(02)(x_38) / ( 2!) */ + ( real_type )-0.0000220786170431021384143907741409823459471252, /* F^(03)(x_38) / ( 3!) */ + ( real_type )+0.00000071953664142993541006476049841687050092592, /* F^(04)(x_38) / ( 4!) */ + ( real_type )-0.0000000189311096531158941536312361671502016500856, /* F^(05)(x_38) / ( 5!) */ + ( real_type )+0.00000000041897494105222617867209942521263961197227, /* F^(06)(x_38) / ( 6!) */ + ( real_type )-0.00000000000802508515231947226278596083711904574546478, /* F^(07)(x_38) / ( 7!) */ + ( real_type )+0.000000000000135847444096715482338903489580477454347134, /* F^(08)(x_38) / ( 8!) */ + ( real_type )-2.06528169713061298427118054330713374306109e-15, /* F^(09)(x_38) / ( 9!) */ + ( real_type )+2.85618624370858945080477989725248336739214e-17, /* F^(10)(x_38) / (10!) */ + ( real_type )-3.63086442704559630368005577677442893971275e-19, /* F^(11)(x_38) / (11!) */ + ( real_type )+4.27998045570120323779885336248755176205908e-21, /* F^(12)(x_38) / (12!) */ + ( real_type )-4.71318353478854836890378157015405080733693e-23, /* F^(13)(x_38) / (13!) */ + ( real_type )+4.88014458774636629404300997231288129961797e-25, /* F^(14)(x_38) / (14!) */ + ( real_type )-4.77827557769766273285326729419921427450918e-27, /* F^(15)(x_38) / (15!) */ + ( real_type )+4.44680263276473429885891031628216578017214e-29, /* F^(16)(x_38) / (16!) */ + ( real_type )-3.95169460311936492078779373115033704200327e-31, /* F^(17)(x_38) / (17!) */ + ( real_type )+3.36783597486420476910852998577448292284786e-33, /* F^(18)(x_38) / (18!) */ + ( real_type )-2.7638941233767003592751291116187084238155e-35, /* F^(19)(x_38) / (19!) */ + ( real_type )+2.19285024386415730786754279969987863513606e-37, /* F^(20)(x_38) / (20!) */ + ( real_type )-1.6885856390157626605735586573721029306172e-39, /* F^(21)(x_38) / (21!) */ + ( real_type )+1.26716574120882322293592236262348344805361e-41, /* F^(22)(x_38) / (22!) */ + ( real_type )-9.30788907908173589609322393616133249625625e-44, /* F^(23)(x_38) / (23!) */ + ( real_type )+6.72532985118079984820698734345111309265611e-46, /* F^(24)(x_38) / (24!) */ + + /* ===== n = 39, xn = 8.21, yn = 0 ==== */ + ( real_type )+0.0613595556079285561738754088706845246229021, /* F^(00)(x_39) / ( 0!) */ + ( real_type )-0.00758849208808983738599669298738710662034198, /* F^(01)(x_39) / ( 1!) */ + ( real_type )+0.000472979189246629030352986329803573874703723, /* F^(02)(x_39) / ( 2!) */ + ( real_type )-0.0000198137857967329062682979125422397549961711, /* F^(03)(x_39) / ( 3!) */ + ( real_type )+0.00000062773499382685809164022342923443443733408, /* F^(04)(x_39) / ( 4!) */ + ( real_type )-0.0000000160470589579307121704571579047259987218427, /* F^(05)(x_39) / ( 5!) */ + ( real_type )+0.000000000344877838874649147060913496769254554575386, /* F^(06)(x_39) / ( 6!) */ + ( real_type )-0.00000000000641301002004583827470435664943276349872624, /* F^(07)(x_39) / ( 7!) */ + ( real_type )+0.00000000000010581015114467120416096977478870308490262, /* F^(08)(x_39) / ( 8!) */ + ( real_type )-1.65755007272233877999923308499500839958229e-15, /* F^(09)(x_39) / ( 9!) */ + ( real_type )+3.70535006261298772006144363563810865192479e-17, /* F^(10)(x_39) / (10!) */ + ( real_type )-2.28882206133433351148711446606149613285811e-18, /* F^(11)(x_39) / (11!) */ + ( real_type )+2.14221301525335466654656480632806016945666e-19, /* F^(12)(x_39) / (12!) */ + ( real_type )-1.85578042816399668879083092019629244131351e-20, /* F^(13)(x_39) / (13!) */ + ( real_type )+1.38664059961102511111523996441138511663786e-21, /* F^(14)(x_39) / (14!) */ + ( real_type )-8.94177038663097429571634432316424784042165e-23, /* F^(15)(x_39) / (15!) */ + ( real_type )+5.01346643789899377241715814388244191757839e-24, /* F^(16)(x_39) / (16!) */ + ( real_type )-2.46190945536696449414307167664258493108547e-25, /* F^(17)(x_39) / (17!) */ + ( real_type )+1.06570830035400759270139269922540451290062e-26, /* F^(18)(x_39) / (18!) */ + ( real_type )-4.08991610515677642920644376851172112338946e-28, /* F^(19)(x_39) / (19!) */ + ( real_type )+1.39856863781330599080068580616737489050834e-29, /* F^(20)(x_39) / (20!) */ + ( real_type )-4.28028415085061054243136861575259440798207e-31, /* F^(21)(x_39) / (21!) */ + ( real_type )+1.17700557865130976599030945303482673243572e-32, /* F^(22)(x_39) / (22!) */ + ( real_type )-2.9180535245427217209055253284192443777708e-34, /* F^(23)(x_39) / (23!) */ + ( real_type )+6.54213183846761024391386995116661887140189e-36, /* F^(24)(x_39) / (24!) */ + + /* ===== n = 40, xn = 8.42, yn = 0 ==== */ + ( real_type )+0.0598028240070970025997168069552382204455551, /* F^(00)(x_40) / ( 0!) */ + ( real_type )-0.00720545696163367208409599058493150103375159, /* F^(01)(x_40) / ( 1!) */ + ( real_type )+0.000437354150698537370090141774444905132868712, /* F^(02)(x_40) / ( 2!) */ + ( real_type )-0.0000178341868566794214155039671013114377259977, /* F^(03)(x_40) / ( 3!) */ + ( real_type )+0.000000549738657749806518708362991465182030740656, /* F^(04)(x_40) / ( 4!) */ + ( real_type )-0.0000000136665165083864689980383664012135893453723, /* F^(05)(x_40) / ( 5!) */ + ( real_type )+0.000000000285484625416871775471663463107629721541382, /* F^(06)(x_40) / ( 6!) */ + ( real_type )-0.0000000000051562573365033935826677249817055443317536, /* F^(07)(x_40) / ( 7!) */ + ( real_type )+8.24248904988808172107358297161600833586766e-14, /* F^(08)(x_40) / ( 8!) */ + ( real_type )-1.22400429580944803483014431516131831384331e-15, /* F^(09)(x_40) / ( 9!) */ + ( real_type )+2.29816685949555023753624497061100858274017e-17, /* F^(10)(x_40) / (10!) */ + ( real_type )-1.17569274741620609809699591715994218294117e-18, /* F^(11)(x_40) / (11!) */ + ( real_type )+1.08490665347446044168931709194093557081896e-19, /* F^(12)(x_40) / (12!) */ + ( real_type )-9.65244032316975383460073887498436989678582e-21, /* F^(13)(x_40) / (13!) */ + ( real_type )+7.44268083557449063135513482712202134498126e-22, /* F^(14)(x_40) / (14!) */ + ( real_type )-4.95827616049610855812911319107504770052377e-23, /* F^(15)(x_40) / (15!) */ + ( real_type )+2.87438416294723318502667580632982452681436e-24, /* F^(16)(x_40) / (16!) */ + ( real_type )-1.46065174527651063539182325730594582657544e-25, /* F^(17)(x_40) / (17!) */ + ( real_type )+6.54901914190294385690937555296205133587652e-27, /* F^(18)(x_40) / (18!) */ + ( real_type )-2.60581179839223519036610332510870929520382e-28, /* F^(19)(x_40) / (19!) */ + ( real_type )+9.24841306115074128276907171495276667132382e-30, /* F^(20)(x_40) / (20!) */ + ( real_type )-2.94114803422092416759962450086760883218879e-31, /* F^(21)(x_40) / (21!) */ + ( real_type )+8.41469263381973923531258926033615269754367e-33, /* F^(22)(x_40) / (22!) */ + ( real_type )-2.17359980517775025059798467594438160314537e-34, /* F^(23)(x_40) / (23!) */ + ( real_type )+5.08522095301643944147753817271990707564808e-36, /* F^(24)(x_40) / (24!) */ + + /* ===== n = 41, xn = 8.63, yn = 0 ==== */ + ( real_type )+0.0583236807470069792151640171108127990614153, /* F^(00)(x_41) / ( 0!) */ + ( real_type )-0.00685090973780471563097233621667497964912208, /* F^(01)(x_41) / ( 1!) */ + ( real_type )+0.000405243758074757440280725056726842299713179, /* F^(02)(x_41) / ( 2!) */ + ( real_type )-0.0000160974715218178146376228983099595328457257, /* F^(03)(x_41) / ( 3!) */ + ( real_type )+0.000000483167950249942122688183551932205411126144, /* F^(04)(x_41) / ( 4!) */ + ( real_type )-0.0000000116907541572879591945714591812345162108786, /* F^(05)(x_41) / ( 5!) */ + ( real_type )+0.000000000237559856273231822305625785002443506847172, /* F^(06)(x_41) / ( 6!) */ + ( real_type )-0.0000000000041656174666626901237991240578942783018823, /* F^(07)(x_41) / ( 7!) */ + ( real_type )+6.30854282277640371027320452669189238641163e-14, /* F^(08)(x_41) / ( 8!) */ + ( real_type )-5.88263477621069869979986463990033064642896e-16, /* F^(09)(x_41) / ( 9!) */ + ( real_type )-3.86369875273470828131068428156754564311525e-17, /* F^(10)(x_41) / (10!) */ + ( real_type )+6.48470340318830315056776911648730949473405e-18, /* F^(11)(x_41) / (11!) */ + ( real_type )-7.2862188837932645956914306159150467202238e-19, /* F^(12)(x_41) / (12!) */ + ( real_type )+6.80327266516109593913788708721603178532847e-20, /* F^(13)(x_41) / (13!) */ + ( real_type )-5.42022462786199148280246286399094236463823e-21, /* F^(14)(x_41) / (14!) */ + ( real_type )+3.72672145002189623581264583967428039387764e-22, /* F^(15)(x_41) / (15!) */ + ( real_type )-2.2307818223026553061859251398083532975776e-23, /* F^(16)(x_41) / (16!) */ + ( real_type )+1.17134730154298109757619977452773536723492e-24, /* F^(17)(x_41) / (17!) */ + ( real_type )-5.43108154734611256853565353499676640838846e-26, /* F^(18)(x_41) / (18!) */ + ( real_type )+2.2366384563402602524232344097103884776523e-27, /* F^(19)(x_41) / (19!) */ + ( real_type )-8.2236287201668832541628933214162007907303e-29, /* F^(20)(x_41) / (20!) */ + ( real_type )+2.71200490930909215638788752668173537931885e-30, /* F^(21)(x_41) / (21!) */ + ( real_type )-8.05490529094306610601239733075438167922909e-32, /* F^(22)(x_41) / (22!) */ + ( real_type )+2.16254275576698004205142245975865124916291e-33, /* F^(23)(x_41) / (23!) */ + ( real_type )-5.26528673203809618546767758522816537690793e-35, /* F^(24)(x_41) / (24!) */ + + /* ===== n = 42, xn = 8.84, yn = 0 ==== */ + ( real_type )+0.056916426843506876837940629331046129974024, /* F^(00)(x_42) / ( 0!) */ + ( real_type )-0.00652207470622676818265102639381608109771372, /* F^(01)(x_42) / ( 1!) */ + ( real_type )+0.000376222121564902076557377947848299352771951, /* F^(02)(x_42) / ( 2!) */ + ( real_type )-0.0000145684995760145548936532952327353271715806, /* F^(03)(x_42) / ( 3!) */ + ( real_type )+0.000000426104115303060165450578812692559159214253, /* F^(04)(x_42) / ( 4!) */ + ( real_type )-0.0000000100426037256630083440610232632362438111252, /* F^(05)(x_42) / ( 5!) */ + ( real_type )+0.00000000019871867766010700203969089272733703119569, /* F^(06)(x_42) / ( 6!) */ + ( real_type )-0.00000000000340098685778398794813177485906805510823135, /* F^(07)(x_42) / ( 7!) */ + ( real_type )+5.26094147584436948668237041818263524807978e-14, /* F^(08)(x_42) / ( 8!) */ + ( real_type )-9.89003108747977162160467553217499114570726e-16, /* F^(09)(x_42) / ( 9!) */ + ( real_type )+5.7987581288480831415550559517901259691177e-17, /* F^(10)(x_42) / (10!) */ + ( real_type )-6.84019812948553461473792033682908368305591e-18, /* F^(11)(x_42) / (11!) */ + ( real_type )+7.66807688650194765721066974583518330795832e-19, /* F^(12)(x_42) / (12!) */ + ( real_type )-7.34932282704540291995619571132599516423188e-20, /* F^(13)(x_42) / (13!) */ + ( real_type )+6.02907809874898298982141669011987581776029e-21, /* F^(14)(x_42) / (14!) */ + ( real_type )-4.27202016745830574365818500627166797018666e-22, /* F^(15)(x_42) / (15!) */ + ( real_type )+2.63705207873701276525705062133418860848175e-23, /* F^(16)(x_42) / (16!) */ + ( real_type )-1.42886363044709785836382661898839318955226e-24, /* F^(17)(x_42) / (17!) */ + ( real_type )+6.84133056911837065053522756991119947346028e-26, /* F^(18)(x_42) / (18!) */ + ( real_type )-2.91155865661733403457764739370444413052212e-27, /* F^(19)(x_42) / (19!) */ + ( real_type )+1.10718039110665357785268725387149574742249e-28, /* F^(20)(x_42) / (20!) */ + ( real_type )-3.77960373476675278871677303947334507403359e-30, /* F^(21)(x_42) / (21!) */ + ( real_type )+1.16311431814603250178632854460731339604814e-31, /* F^(22)(x_42) / (22!) */ + ( real_type )-3.23870606295353030653010296026147086319227e-33, /* F^(23)(x_42) / (23!) */ + ( real_type )+8.18748651059542495218808041312868327077504e-35, /* F^(24)(x_42) / (24!) */ + + /* ===== n = 43, xn = 9.05, yn = 0 ==== */ + ( real_type )+0.0555759118010602678845137769255961249291392, /* F^(00)(x_43) / ( 0!) */ + ( real_type )-0.0062165083981437663124648978996881046868869, /* F^(01)(x_43) / ( 1!) */ + ( real_type )+0.000349924217383754810789185433281682218545804, /* F^(02)(x_43) / ( 2!) */ + ( real_type )-0.0000132179603232753873982752516160668807168414, /* F^(03)(x_43) / ( 3!) */ + ( real_type )+0.000000376989904646399739467496914465192711533721, /* F^(04)(x_43) / ( 4!) */ + ( real_type )-0.0000000086608507942006864740894214789648321911132, /* F^(05)(x_43) / ( 5!) */ + ( real_type )+0.000000000166972803936254436878969559952565199300229, /* F^(06)(x_43) / ( 6!) */ + ( real_type )-0.00000000000277829164783441837183619655663018603687438, /* F^(07)(x_43) / ( 7!) */ + ( real_type )+4.05497812978865872533774048448123390846176e-14, /* F^(08)(x_43) / ( 8!) */ + ( real_type )-4.88784560922351083011439606569743368509343e-16, /* F^(09)(x_43) / ( 9!) */ + ( real_type )-1.61489407626265552694371009923669468240958e-18, /* F^(10)(x_43) / (10!) */ + ( real_type )+1.04954540832504897759910306891941753413716e-18, /* F^(11)(x_43) / (11!) */ + ( real_type )-1.29921379924375475085157900804902518065458e-19, /* F^(12)(x_43) / (12!) */ + ( real_type )+1.28836481576733645641576146415690916880002e-20, /* F^(13)(x_43) / (13!) */ + ( real_type )-1.08813232512577494200967206032323198641774e-21, /* F^(14)(x_43) / (14!) */ + ( real_type )+7.93795596961427586452137387088555505369934e-23, /* F^(15)(x_43) / (15!) */ + ( real_type )-5.04727932671761477689634338968315110776105e-24, /* F^(16)(x_43) / (16!) */ + ( real_type )+2.81868081244312580163056744190040810241518e-25, /* F^(17)(x_43) / (17!) */ + ( real_type )-1.3918204379290128771550174011633485071732e-26, /* F^(18)(x_43) / (18!) */ + ( real_type )+6.11285532627695943991537369700627618715394e-28, /* F^(19)(x_43) / (19!) */ + ( real_type )-2.40060282191246237730027745915727676648305e-29, /* F^(20)(x_43) / (20!) */ + ( real_type )+8.46954433152673114747511513761212520306726e-31, /* F^(21)(x_43) / (21!) */ + ( real_type )-2.695876961395314053727170168215466589281e-32, /* F^(22)(x_43) / (22!) */ + ( real_type )+7.77126201728438522193172498048712351607617e-34, /* F^(23)(x_43) / (23!) */ + ( real_type )-2.03573499187372316642570911805212465910247e-35, /* F^(24)(x_43) / (24!) */ + + /* ===== n = 44, xn = 9.26, yn = 0 ==== */ + ( real_type )+0.0542974687523859076954105481536284958849097, /* F^(00)(x_44) / ( 0!) */ + ( real_type )-0.00593205267578106738936593649757710310427244, /* F^(01)(x_44) / ( 1!) */ + ( real_type )+0.000326035911635148854351941523148709992562738, /* F^(02)(x_44) / ( 2!) */ + ( real_type )-0.0000120212868756264384045590343854963810442863, /* F^(03)(x_44) / ( 3!) */ + ( real_type )+0.000000334555151892393642915401611084435215869307, /* F^(04)(x_44) / ( 4!) */ + ( real_type )-0.00000000749723820562400466784252253610596524367375, /* F^(05)(x_44) / ( 5!) */ + ( real_type )+0.000000000140948383037219391031693850660947901017957, /* F^(06)(x_44) / ( 6!) */ + ( real_type )-0.00000000000228928269976641398118573546926227943297782, /* F^(07)(x_44) / ( 7!) */ + ( real_type )+3.34532444560746496905466177147152555789705e-14, /* F^(08)(x_44) / ( 8!) */ + ( real_type )-5.85736988075250653566837345027724719948586e-16, /* F^(09)(x_44) / ( 9!) */ + ( real_type )+3.41749408708513132849928880470884425396372e-17, /* F^(10)(x_44) / (10!) */ + ( real_type )-4.26436652103249685881628286113824897639695e-18, /* F^(11)(x_44) / (11!) */ + ( real_type )+5.05481772207119836493558659055834237256166e-19, /* F^(12)(x_44) / (12!) */ + ( real_type )-5.12070247587366187821475485141482233474228e-20, /* F^(13)(x_44) / (13!) */ + ( real_type )+4.44342390217525041767319286810449691644862e-21, /* F^(14)(x_44) / (14!) */ + ( real_type )-3.33355489541982346342519779058304904356536e-22, /* F^(15)(x_44) / (15!) */ + ( real_type )+2.18101271449973594281238974833081138755888e-23, /* F^(16)(x_44) / (16!) */ + ( real_type )-1.25395109586174881735142517146274314128778e-24, /* F^(17)(x_44) / (17!) */ + ( real_type )+6.37814712201086034447254372308359784311422e-26, /* F^(18)(x_44) / (18!) */ + ( real_type )-2.88727916288300588587121699254357550110157e-27, /* F^(19)(x_44) / (19!) */ + ( real_type )+1.1694201617034222446946344906238485924606e-28, /* F^(20)(x_44) / (20!) */ + ( real_type )-4.25799728875555000500558821860167594668487e-30, /* F^(21)(x_44) / (21!) */ + ( real_type )+1.39974519687690416008342627541711875462459e-31, /* F^(22)(x_44) / (22!) */ + ( real_type )-4.17036198790153727417380153869391629105663e-33, /* F^(23)(x_44) / (23!) */ + ( real_type )+1.13003030868288406299562471267501882086256e-34, /* F^(24)(x_44) / (24!) */ + + /* ===== n = 45, xn = 9.47, yn = 0 ==== */ + ( real_type )+0.0530768586471334283581234719746748841532657, /* F^(00)(x_45) / ( 0!) */ + ( real_type )-0.00566679541937014212575768228580576574794101, /* F^(01)(x_45) / ( 1!) */ + ( real_type )+0.000304285820818167455316590486445124660215002, /* F^(02)(x_45) / ( 2!) */ + ( real_type )-0.0000109577925526758764693182959298322482306074, /* F^(03)(x_45) / ( 3!) */ + ( real_type )+0.000000297757418639892928197076016346285168815135, /* F^(04)(x_45) / ( 4!) */ + ( real_type )-0.00000000651292938934867206241836046538416872161554, /* F^(05)(x_45) / ( 5!) */ + ( real_type )+0.00000000011944181067592909804714239546778837201158, /* F^(06)(x_45) / ( 6!) */ + ( real_type )-0.00000000000188023527833105854663216170610810508233604, /* F^(07)(x_45) / ( 7!) */ + ( real_type )+2.34262330485152064902644831972995714567772e-14, /* F^(08)(x_45) / ( 8!) */ + ( real_type )+3.23374707347013921911074013523273256279906e-16, /* F^(09)(x_45) / ( 9!) */ + ( real_type )-1.13329292844906600637670902987364572580821e-16, /* F^(10)(x_45) / (10!) */ + ( real_type )+1.7211710107110796044057073785543050042293e-17, /* F^(11)(x_45) / (11!) */ + ( real_type )-2.12160615683321026479220028462489816856157e-18, /* F^(12)(x_45) / (12!) */ + ( real_type )+2.20888965365131770233365821719429430403035e-19, /* F^(13)(x_45) / (13!) */ + ( real_type )-1.96880798964957988311886012756472786532309e-20, /* F^(14)(x_45) / (14!) */ + ( real_type )+1.5176966080946334336901962297906961158232e-21, /* F^(15)(x_45) / (15!) */ + ( real_type )-1.02075310411789741492720844445777012244999e-22, /* F^(16)(x_45) / (16!) */ + ( real_type )+6.03580082025488064673292212940192275573158e-24, /* F^(17)(x_45) / (17!) */ + ( real_type )-3.15906425336466158935996261520732533673206e-25, /* F^(18)(x_45) / (18!) */ + ( real_type )+1.47228556560772957162089827456642465700777e-26, /* F^(19)(x_45) / (19!) */ + ( real_type )-6.14265155988801805462436292445869484789677e-28, /* F^(20)(x_45) / (20!) */ + ( real_type )+2.30531125921257632948101469540797239936393e-29, /* F^(21)(x_45) / (21!) */ + ( real_type )-7.81599946503118789441609773940998717830246e-31, /* F^(22)(x_45) / (22!) */ + ( real_type )+2.40331278561333622093587217828917162248839e-32, /* F^(23)(x_45) / (23!) */ + ( real_type )-6.72568255314270711703334693796308687425414e-34, /* F^(24)(x_45) / (24!) */ + + /* ===== n = 46, xn = 9.68, yn = 0 ==== */ + ( real_type )+0.0519102220411792443100980154384748583981615, /* F^(00)(x_46) / ( 0!) */ + ( real_type )-0.00541903742915588008915335610999928385862678, /* F^(01)(x_46) / ( 1!) */ + ( real_type )+0.000284438636375691707531869227943370470123038, /* F^(02)(x_46) / ( 2!) */ + ( real_type )-0.0000100099826449872846040062898356273150195023, /* F^(03)(x_46) / ( 3!) */ + ( real_type )+0.000000265737563365874133688713283419566052874283, /* F^(04)(x_46) / ( 4!) */ + ( real_type )-0.00000000567702777107889792694159688186293716019227, /* F^(05)(x_46) / ( 5!) */ + ( real_type )+0.000000000101667745871916667999170294290262257556461, /* F^(06)(x_46) / ( 6!) */ + ( real_type )-0.00000000000156736977876725376856587532586437654861672, /* F^(07)(x_46) / ( 7!) */ + ( real_type )+2.04617254430935485054407057145331714866361e-14, /* F^(08)(x_46) / ( 8!) */ + ( real_type )-5.51712241156402707080052186833839239123009e-17, /* F^(09)(x_46) / ( 9!) */ + ( real_type )-3.47847059980441092623334266783112454588609e-17, /* F^(10)(x_46) / (10!) */ + ( real_type )+5.65916592384500439915137887616801197795248e-18, /* F^(11)(x_46) / (11!) */ + ( real_type )-7.17254280695072482543751774529779820393255e-19, /* F^(12)(x_46) / (12!) */ + ( real_type )+7.66206425423889205119658026727381485806795e-20, /* F^(13)(x_46) / (13!) */ + ( real_type )-7.0085410901507030871237744701618289315713e-21, /* F^(14)(x_46) / (14!) */ + ( real_type )+5.546603054276632076326855200821521583778e-22, /* F^(15)(x_46) / (15!) */ + ( real_type )-3.83141491849640098583262311990061445348554e-23, /* F^(16)(x_46) / (16!) */ + ( real_type )+2.32786171456493450621329868330257658372673e-24, /* F^(17)(x_46) / (17!) */ + ( real_type )-1.25245243447708574663266398448520561719326e-25, /* F^(18)(x_46) / (18!) */ + ( real_type )+6.0031892197125761718106175222561365059955e-27, /* F^(19)(x_46) / (19!) */ + ( real_type )-2.57721466573527744812126578156926574616009e-28, /* F^(20)(x_46) / (20!) */ + ( real_type )+9.95768470958399742795224106136343655373691e-30, /* F^(21)(x_46) / (21!) */ + ( real_type )-3.47768034268890599833474454405116832920709e-31, /* F^(22)(x_46) / (22!) */ + ( real_type )+1.10216893229834497101517645001661405118555e-32, /* F^(23)(x_46) / (23!) */ + ( real_type )-3.18111147372847042065708658201432150093729e-34, /* F^(24)(x_46) / (24!) */ + + /* ===== n = 47, xn = 9.89, yn = 0 ==== */ + ( real_type )+0.0507940372987049051842427501977029926302941, /* F^(00)(x_47) / ( 0!) */ + ( real_type )-0.0051872644375285264915026110728112666749738, /* F^(01)(x_47) / ( 1!) */ + ( real_type )+0.000266289620525517424059316578230907237076579, /* F^(02)(x_47) / ( 2!) */ + ( real_type )-0.00000916300002662301642744630002691225046483043, /* F^(03)(x_47) / ( 3!) */ + ( real_type )+0.000000237783388224889414822687120282568232066124, /* F^(04)(x_47) / ( 4!) */ + ( real_type )-0.00000000496432362029421669578665215863082201199385, /* F^(05)(x_47) / ( 5!) */ + ( real_type )+0.0000000000868887876044820112349319940727050712060132, /* F^(06)(x_47) / ( 6!) */ + ( real_type )-0.0000000000013205884827302468364155074412725839059144, /* F^(07)(x_47) / ( 7!) */ + ( real_type )+2.04434865572947391419147138641058044328278e-14, /* F^(08)(x_47) / ( 8!) */ + ( real_type )-9.18749611809063352955273371463069210313266e-16, /* F^(09)(x_47) / ( 9!) */ + ( real_type )+1.3638574252541929339845212339122693026106e-16, /* F^(10)(x_47) / (10!) */ + ( real_type )-2.0787207770632104078979795458494024786416e-17, /* F^(11)(x_47) / (11!) */ + ( real_type )+2.6845173153697647116170463332038329429158e-18, /* F^(12)(x_47) / (12!) */ + ( real_type )-2.93850000821767801723054718431510294968548e-19, /* F^(13)(x_47) / (13!) */ + ( real_type )+2.75619091554315000279016082068684459651966e-20, /* F^(14)(x_47) / (14!) */ + ( real_type )-2.23758713054242643040995726475114850453247e-21, /* F^(15)(x_47) / (15!) */ + ( real_type )+1.58616212559661273523823309416907565401197e-22, /* F^(16)(x_47) / (16!) */ + ( real_type )-9.89353969903375864997269555889633347556983e-24, /* F^(17)(x_47) / (17!) */ + ( real_type )+5.46688927865273947261497438379049122070671e-25, /* F^(18)(x_47) / (18!) */ + ( real_type )-2.69235566832619817586400019924219361613722e-26, /* F^(19)(x_47) / (19!) */ + ( real_type )+1.18814203383894157937548689931331586591003e-27, /* F^(20)(x_47) / (20!) */ + ( real_type )-4.72116776495099908854394489377353958555448e-29, /* F^(21)(x_47) / (21!) */ + ( real_type )+1.69656584350675791868611591924368494369124e-30, /* F^(22)(x_47) / (22!) */ + ( real_type )-5.53538268774998422856379083477425033268396e-32, /* F^(23)(x_47) / (23!) */ + ( real_type )+1.64565214524282424507330261523929194453537e-33, /* F^(24)(x_47) / (24!) */ + + /* ===== n = 48, xn = 10.11, yn = 0 ==== */ + ( real_type )+0.0497250842279749621621125383083789598731151, /* F^(00)(x_48) / ( 0!) */ + ( real_type )-0.00497012334433602159459437963238704638816512, /* F^(01)(x_48) / ( 1!) */ + ( real_type )+0.000249660046868205581834397087218009442707496, /* F^(02)(x_48) / ( 2!) */ + ( real_type )-0.00000840417809945337897831162061095288879927928, /* F^(03)(x_48) / ( 3!) */ + ( real_type )+0.0000002133019622571051928403560915222340989274, /* F^(04)(x_48) / ( 4!) */ + ( real_type )-0.00000000435423486720271412617168249610675482370902, /* F^(05)(x_48) / ( 5!) */ + ( real_type )+0.0000000000744609296079571141039782783757659427511061, /* F^(06)(x_48) / ( 6!) */ + ( real_type )-0.00000000000109148887630326612561062387534040086017174, /* F^(07)(x_48) / ( 7!) */ + ( real_type )+1.22658334587416408393784107595663382392957e-14, /* F^(08)(x_48) / ( 8!) */ + ( real_type )+3.08311965096636956329512372160593352520112e-16, /* F^(09)(x_48) / ( 9!) */ + ( real_type )-8.9568878526676000808338657150266464190394e-17, /* F^(10)(x_48) / (10!) */ + ( real_type )+1.44510064753703183849823478522132878734766e-17, /* F^(11)(x_48) / (11!) */ + ( real_type )-1.91511941679793106854696755190648840029074e-18, /* F^(12)(x_48) / (12!) */ + ( real_type )+2.14775563857074477716090235700217236676651e-19, /* F^(13)(x_48) / (13!) */ + ( real_type )-2.06433338208748730198529945023497899269394e-20, /* F^(14)(x_48) / (14!) */ + ( real_type )+1.71791296899673318364194647292868561039623e-21, /* F^(15)(x_48) / (15!) */ + ( real_type )-1.24872971713650457973279009567807029357067e-22, /* F^(16)(x_48) / (16!) */ + ( real_type )+7.98965176410026077012602959087747284348482e-24, /* F^(17)(x_48) / (17!) */ + ( real_type )-4.53037436369374517535581904465977050613208e-25, /* F^(18)(x_48) / (18!) */ + ( real_type )+2.29041161908061312963812744148899456107829e-26, /* F^(19)(x_48) / (19!) */ + ( real_type )-1.03804022954352695980376897564578739670418e-27, /* F^(20)(x_48) / (20!) */ + ( real_type )+4.23785209908321614720568718692792569740526e-29, /* F^(21)(x_48) / (21!) */ + ( real_type )-1.56535380055208530969842343997865099897045e-30, /* F^(22)(x_48) / (22!) */ + ( real_type )+5.25217984163149475235779825221042874005706e-32, /* F^(23)(x_48) / (23!) */ + ( real_type )-1.60655463065691881598653556689979583323867e-33, /* F^(24)(x_48) / (24!) */ + + /* ===== n = 49, xn = 10.32, yn = 0 ==== */ + ( real_type )+0.0487004123398296771663508633922339236261419, /* F^(00)(x_49) / ( 0!) */ + ( real_type )-0.00476640195859124327516629119438429782643326, /* F^(01)(x_49) / ( 1!) */ + ( real_type )+0.000234393405976837479967299687399532136999912, /* F^(02)(x_49) / ( 2!) */ + ( real_type )-0.00000772267795138403948394505498166400341460832, /* F^(03)(x_49) / ( 3!) */ + ( real_type )+0.000000191798074241015654244942238831104997461583, /* F^(04)(x_49) / ( 4!) */ + ( real_type )-0.00000000383032539858897273251153098870620356386947, /* F^(05)(x_49) / ( 5!) */ + ( real_type )+0.0000000000640675321859040241096616524568102111059469, /* F^(06)(x_49) / ( 6!) */ + ( real_type )-0.000000000000919167661092964954559186679322840481559543, /* F^(07)(x_49) / ( 7!) */ + ( real_type )+1.02948589021079584118940452814101730971287e-14, /* F^(08)(x_49) / ( 8!) */ + ( real_type )+2.1472495163434944777062048968853374400249e-16, /* F^(09)(x_49) / ( 9!) */ + ( real_type )-6.71786121430460396638847522792337412936717e-17, /* F^(10)(x_49) / (10!) */ + ( real_type )+1.10996351212977467965988861748918214488268e-17, /* F^(11)(x_49) / (11!) */ + ( real_type )-1.50547711927166849792260956193318840220605e-18, /* F^(12)(x_49) / (12!) */ + ( real_type )+1.72842786111403879895659388606337274772797e-19, /* F^(13)(x_49) / (13!) */ + ( real_type )-1.70122842991694368547057171255906197097121e-20, /* F^(14)(x_49) / (14!) */ + ( real_type )+1.45021537780465368201748990376661702520598e-21, /* F^(15)(x_49) / (15!) */ + ( real_type )-1.08015345643132197465282669029286008046432e-22, /* F^(16)(x_49) / (16!) */ + ( real_type )+7.08391075757089224100288904111350373127662e-24, /* F^(17)(x_49) / (17!) */ + ( real_type )-4.11865976321650831500624203645202171488083e-25, /* F^(18)(x_49) / (18!) */ + ( real_type )+2.13582942131009896830494288100498273309317e-26, /* F^(19)(x_49) / (19!) */ + ( real_type )-9.93252549854248206399214696444551997380761e-28, /* F^(20)(x_49) / (20!) */ + ( real_type )+4.16248049335211146961976568370728976690669e-29, /* F^(21)(x_49) / (21!) */ + ( real_type )-1.57890489300878429255696184561131628636765e-30, /* F^(22)(x_49) / (22!) */ + ( real_type )+5.44257613888891811034743817301078479844139e-32, /* F^(23)(x_49) / (23!) */ + ( real_type )-1.71109950557737040013595242082424929747368e-33, /* F^(24)(x_49) / (24!) */ + + /* ===== n = 50, xn = 10.53, yn = 0 ==== */ + ( real_type )+0.047717313054061251201007710934289736649871, /* F^(00)(x_50) / ( 0!) */ + ( real_type )-0.00457501166444729555882239701345474972491861, /* F^(01)(x_50) / ( 1!) */ + ( real_type )+0.000220352233218296040788079728784108614920802, /* F^(02)(x_50) / ( 2!) */ + ( real_type )-0.00000710919101064205898405862023590652267885554, /* F^(03)(x_50) / ( 3!) */ + ( real_type )+0.000000172855647450899558601918510682985195649801, /* F^(04)(x_50) / ( 4!) */ + ( real_type )-0.00000000337883027212685544639506801700272495298851, /* F^(05)(x_50) / ( 5!) */ + ( real_type )+0.0000000000553058307367038028351823657560256442365857, /* F^(06)(x_50) / ( 6!) */ + ( real_type )-0.000000000000776663699833262956205428279862894824404088, /* F^(07)(x_50) / ( 7!) */ + ( real_type )+8.58045013373960963051697809307788391951691e-15, /* F^(08)(x_50) / ( 8!) */ + ( real_type )+1.66973697668717040345484897038910259806461e-16, /* F^(09)(x_50) / ( 9!) */ + ( real_type )-5.42200243678062252909723137912019096766245e-17, /* F^(10)(x_50) / (10!) */ + ( real_type )+9.15768146845644334239479700187675168920539e-18, /* F^(11)(x_50) / (11!) */ + ( real_type )-1.27038269322274422543830118205546418822341e-18, /* F^(12)(x_50) / (12!) */ + ( real_type )+1.49222595029269875410994891172139651690612e-19, /* F^(13)(x_50) / (13!) */ + ( real_type )-1.50310450262277740180071240397829711551964e-20, /* F^(14)(x_50) / (14!) */ + ( real_type )+1.31166906492503376794433084329179101374762e-21, /* F^(15)(x_50) / (15!) */ + ( real_type )-1.00038852502104528866286328922218658120868e-22, /* F^(16)(x_50) / (16!) */ + ( real_type )+6.72014696260150290259394448424917188417994e-24, /* F^(17)(x_50) / (17!) */ + ( real_type )-4.00331794429253556321333457226259289993895e-25, /* F^(18)(x_50) / (18!) */ + ( real_type )+2.1277991084801543905061565990919146427665e-26, /* F^(19)(x_50) / (19!) */ + ( real_type )-1.01454379540290916024226969129605783598737e-27, /* F^(20)(x_50) / (20!) */ + ( real_type )+4.36077497434109906944827646216330294479098e-29, /* F^(21)(x_50) / (21!) */ + ( real_type )-1.69717907174621335340716383965353419447934e-30, /* F^(22)(x_50) / (22!) */ + ( real_type )+6.00486669852708387425566034595629561783192e-32, /* F^(23)(x_50) / (23!) */ + ( real_type )-1.93854457291720519891030708137587755207413e-33, /* F^(24)(x_50) / (24!) */ + + /* ===== n = 51, xn = 10.74, yn = 0 ==== */ + ( real_type )+0.0467732952896975256047898303754890525957012, /* F^(00)(x_51) / ( 0!) */ + ( real_type )-0.00439497253666262010872352000346217209298768, /* F^(01)(x_51) / ( 1!) */ + ( real_type )+0.000207415446708460117379821201693106429201371, /* F^(02)(x_51) / ( 2!) */ + ( real_type )-0.00000655569626001136185771573896114486903481577, /* F^(03)(x_51) / ( 3!) */ + ( real_type )+0.000000156124174706603260882365679889767013864982, /* F^(04)(x_51) / ( 4!) */ + ( real_type )-0.00000000298852381091829594325834311114595918183142, /* F^(05)(x_51) / ( 5!) */ + ( real_type )+0.0000000000479151857960232743771156905023314859415973, /* F^(06)(x_51) / ( 6!) */ + ( real_type )-0.000000000000668182959858525912257018999458734904060562, /* F^(07)(x_51) / ( 7!) */ + ( real_type )+1.02858873481045010112186634519973537965759e-14, /* F^(08)(x_51) / ( 8!) */ + ( real_type )-6.64569836711265876013725614125047151013828e-16, /* F^(09)(x_51) / ( 9!) */ + ( real_type )+1.1985010065355539025141185177821102846876e-16, /* F^(10)(x_51) / (10!) */ + ( real_type )-2.01711507991073454504356286219323860408801e-17, /* F^(11)(x_51) / (11!) */ + ( real_type )+2.85665274327348014350276525111850049250495e-18, /* F^(12)(x_51) / (12!) */ + ( real_type )-3.43082842850121435905929085034014743028454e-19, /* F^(13)(x_51) / (13!) */ + ( real_type )+3.53457529999308391971089220068925030577232e-20, /* F^(14)(x_51) / (14!) */ + ( real_type )-3.1555186818262377927711580239268493888301e-21, /* F^(15)(x_51) / (15!) */ + ( real_type )+2.46280643075372844430236910527609146928093e-22, /* F^(16)(x_51) / (16!) */ + ( real_type )-1.69346467866353673774989476011280955314827e-23, /* F^(17)(x_51) / (17!) */ + ( real_type )+1.03294772842600940212165090598397464505091e-24, /* F^(18)(x_51) / (18!) */ + ( real_type )-5.62314845026685848650854716238539856012236e-26, /* F^(19)(x_51) / (19!) */ + ( real_type )+2.74691450266273197652817288460634455488105e-27, /* F^(20)(x_51) / (20!) */ + ( real_type )-1.21005047921151008013346369305718379441804e-28, /* F^(21)(x_51) / (21!) */ + ( real_type )+4.82812661554318640319286258265579425217478e-30, /* F^(22)(x_51) / (22!) */ + ( real_type )-1.75193208636462682777345732882522318007881e-31, /* F^(23)(x_51) / (23!) */ + ( real_type )+5.8024419116050274702812864452360419263662e-33, /* F^(24)(x_51) / (24!) */ + + /* ===== n = 52, xn = 10.95, yn = 0 ==== */ + ( real_type )+0.0458660639658681409761153082730452877258985, /* F^(00)(x_52) / ( 0!) */ + ( real_type )-0.00422540051584968625684645963846826108161075, /* F^(01)(x_52) / ( 1!) */ + ( real_type )+0.000195476103822103757493869975119796469144516, /* F^(02)(x_52) / ( 2!) */ + ( real_type )-0.00000605525958392463816279023204853776872681203, /* F^(03)(x_52) / ( 3!) */ + ( real_type )+0.00000014130703453728575634888160325883871530793, /* F^(04)(x_52) / ( 4!) */ + ( real_type )-0.0000000026500217267932717100054532904618566612343, /* F^(05)(x_52) / ( 5!) */ + ( real_type )+0.0000000000416309588634928837346102013351940538806812, /* F^(06)(x_52) / ( 6!) */ + ( real_type )-0.000000000000574659453120998679323960610012510923377968, /* F^(07)(x_52) / ( 7!) */ + ( real_type )+1.07415285098235836016129785080461404220113e-14, /* F^(08)(x_52) / ( 8!) */ + ( real_type )-1.12985279906554024749641447210842455988405e-15, /* F^(09)(x_52) / ( 9!) */ + ( real_type )+2.23508233703396838113626866971839738636073e-16, /* F^(10)(x_52) / (10!) */ + ( real_type )-3.85758958608803910438496104500738877433943e-17, /* F^(11)(x_52) / (11!) */ + ( real_type )+5.58313352534089465203662036863142633502402e-18, /* F^(12)(x_52) / (12!) */ + ( real_type )-6.85277652750656271019613652077219602426393e-19, /* F^(13)(x_52) / (13!) */ + ( real_type )+7.21685216911632366045785877486141510687588e-20, /* F^(14)(x_52) / (14!) */ + ( real_type )-6.58761770300433908528058943163885153833595e-21, /* F^(15)(x_52) / (15!) */ + ( real_type )+5.25826900253328773084377822337373829901225e-22, /* F^(16)(x_52) / (16!) */ + ( real_type )-3.69875065490346811848044135516156854530176e-23, /* F^(17)(x_52) / (17!) */ + ( real_type )+2.3085485619495732161764026474985713682334e-24, /* F^(18)(x_52) / (18!) */ + ( real_type )-1.28629751526907744857867713920293569872123e-25, /* F^(19)(x_52) / (19!) */ + ( real_type )+6.43327361990716685746912101186430643135351e-27, /* F^(20)(x_52) / (20!) */ + ( real_type )-2.90230970041801686572137326878779442891734e-28, /* F^(21)(x_52) / (21!) */ + ( real_type )+1.18633016255965681085887889320527521497078e-29, /* F^(22)(x_52) / (22!) */ + ( real_type )-4.41132766598851821186266692663622950305233e-31, /* F^(23)(x_52) / (23!) */ + ( real_type )+1.4977247207304951930171979726270059703538e-32, /* F^(24)(x_52) / (24!) */ + + /* ===== n = 53, xn = 11.16, yn = 0 ==== */ + ( real_type )+0.0449935010144609170081544834090713795542632, /* F^(00)(x_53) / ( 0!) */ + ( real_type )-0.00406549632270670383243486129740156189561465, /* F^(01)(x_53) / ( 1!) */ + ( real_type )+0.000184439503659571692771200650189070645187411, /* F^(02)(x_53) / ( 2!) */ + ( real_type )-0.00000560186795456736269812855536092195303900336, /* F^(03)(x_53) / ( 3!) */ + ( real_type )+0.000000128152301695132115531150403744979535354137, /* F^(04)(x_53) / ( 4!) */ + ( real_type )-0.00000000235543231651803632713805987480497659818229, /* F^(05)(x_53) / ( 5!) */ + ( real_type )+0.0000000000361780838021100136786620248743521128024442, /* F^(06)(x_53) / ( 6!) */ + ( real_type )-0.00000000000045302849353427988303432929184037125759031, /* F^(07)(x_53) / ( 7!) */ + ( real_type )-3.54542007406460624301465420315385777423025e-15, /* F^(08)(x_53) / ( 8!) */ + ( real_type )+2.37501199200209381292301232642631169229583e-15, /* F^(09)(x_53) / ( 9!) */ + ( real_type )-5.22123964834241770141690835236065857930204e-16, /* F^(10)(x_53) / (10!) */ + ( real_type )+9.23686452893042789718820792667289306924569e-17, /* F^(11)(x_53) / (11!) */ + ( real_type )-1.36551917519143386952318631849534640842274e-17, /* F^(12)(x_53) / (12!) */ + ( real_type )+1.7120213617965999024448680659338731645019e-18, /* F^(13)(x_53) / (13!) */ + ( real_type )-1.84205660985914919446122074182983237266424e-19, /* F^(14)(x_53) / (14!) */ + ( real_type )+1.7182756756292733217729310563142703502973e-20, /* F^(15)(x_53) / (15!) */ + ( real_type )-1.40189854512824846607284478808854463577923e-21, /* F^(16)(x_53) / (16!) */ + ( real_type )+1.00818782705915509633560706511242379205626e-22, /* F^(17)(x_53) / (17!) */ + ( real_type )-6.43494286807099240269040635142657915398034e-24, /* F^(18)(x_53) / (18!) */ + ( real_type )+3.66755521749955621549086580291771144016666e-25, /* F^(19)(x_53) / (19!) */ + ( real_type )-1.87676915112946298690310088514105380995941e-26, /* F^(20)(x_53) / (20!) */ + ( real_type )+8.66531293646498004945670615262370843519909e-28, /* F^(21)(x_53) / (21!) */ + ( real_type )-3.62601874583111793904672331300038023067839e-29, /* F^(22)(x_53) / (22!) */ + ( real_type )+1.38071670594766568527588480301476675781329e-30, /* F^(23)(x_53) / (23!) */ + ( real_type )-4.80186172808042919311650230896866552131092e-32, /* F^(24)(x_53) / (24!) */ + + /* ===== n = 54, xn = 11.37, yn = 0 ==== */ + ( real_type )+0.0441536485672899944916871372517739024921971, /* F^(00)(x_54) / ( 0!) */ + ( real_type )-0.00391453584575143719359478025493256869430428, /* F^(01)(x_54) / ( 1!) */ + ( real_type )+0.000174221576415803278083996892781894796209231, /* F^(02)(x_54) / ( 2!) */ + ( real_type )-0.00000519029206052460945068466367741307771225633, /* F^(03)(x_54) / ( 3!) */ + ( real_type )+0.000000116445840613343388440474339739957943353381, /* F^(04)(x_54) / ( 4!) */ + ( real_type )-0.0000000020985864631169540997539963354561161145031, /* F^(05)(x_54) / ( 5!) */ + ( real_type )+0.0000000000315803558554304152555084321213892925069466, /* F^(06)(x_54) / ( 6!) */ + ( real_type )-0.00000000000037772946273386666761089304754870408547659, /* F^(07)(x_54) / ( 7!) */ + ( real_type )-6.79036973674142336013682481126907168282343e-15, /* F^(08)(x_54) / ( 8!) */ + ( real_type )+3.07190037313720696197684951179737554362592e-15, /* F^(09)(x_54) / ( 9!) */ + ( real_type )-6.83363426945104466902708011638330206057241e-16, /* F^(10)(x_54) / (10!) */ + ( real_type )+1.23331787308553170243962002559029889442477e-16, /* F^(11)(x_54) / (11!) */ + ( real_type )-1.86106074915713279378465217993758128634326e-17, /* F^(12)(x_54) / (12!) */ + ( real_type )+2.38219613097116012596517122299647660122358e-18, /* F^(13)(x_54) / (13!) */ + ( real_type )-2.61736974032354056550802558822467693733074e-19, /* F^(14)(x_54) / (14!) */ + ( real_type )+2.49367045274982994686054233632704621398589e-20, /* F^(15)(x_54) / (15!) */ + ( real_type )-2.07845175883990575315026737646999860894905e-21, /* F^(16)(x_54) / (16!) */ + ( real_type )+1.52734745322533383601291137758565973309742e-22, /* F^(17)(x_54) / (17!) */ + ( real_type )-9.96352485201208512115388250188140189572428e-24, /* F^(18)(x_54) / (18!) */ + ( real_type )+5.80522338265941780069427759072181715008367e-25, /* F^(19)(x_54) / (19!) */ + ( real_type )-3.03761316351135098187928582765913193810985e-26, /* F^(20)(x_54) / (20!) */ + ( real_type )+1.43447862795843736051452503819376043372946e-27, /* F^(21)(x_54) / (21!) */ + ( real_type )-6.14102255157761663840872071977303293106194e-29, /* F^(22)(x_54) / (22!) */ + ( real_type )+2.39294414594439294152188902324743205398755e-30, /* F^(23)(x_54) / (23!) */ + ( real_type )-8.51874541718471648284534987047510185240215e-32, /* F^(24)(x_54) / (24!) */ + + /* ===== n = 55, xn = 11.58, yn = 0 ==== */ + ( real_type )+0.043344694031490533589896575933680459971779, /* F^(00)(x_55) / ( 0!) */ + ( real_type )-0.00377186178188592756191947175976382352428882, /* F^(01)(x_55) / ( 1!) */ + ( real_type )+0.000164747510962732410837742501351263385152657, /* F^(02)(x_55) / ( 2!) */ + ( real_type )-0.00000481597057175221577734758315879745204502109, /* F^(03)(x_55) / ( 3!) */ + ( real_type )+0.000000106004098825759315410619063316274004043053, /* F^(04)(x_55) / ( 4!) */ + ( real_type )-0.00000000187385905618535499671448663126617318057107, /* F^(05)(x_55) / ( 5!) */ + ( real_type )+0.0000000000275830901254171172977827804075060580843602, /* F^(06)(x_55) / ( 6!) */ + ( real_type )-0.000000000000288706372369797987724854018360607647692154, /* F^(07)(x_55) / ( 7!) */ + ( real_type )-1.86726736215780962924014273836565426858471e-14, /* F^(08)(x_55) / ( 8!) */ + ( real_type )+6.22958522563411428304639551904658081644383e-15, /* F^(09)(x_55) / ( 9!) */ + ( real_type )-1.40114595882404222883795080847975664823742e-15, /* F^(10)(x_55) / (10!) */ + ( real_type )+2.57865070911339224324896153986803055376251e-16, /* F^(11)(x_55) / (11!) */ + ( real_type )-3.97004052623460958251613998202814161028464e-17, /* F^(12)(x_55) / (12!) */ + ( real_type )+5.18580055773270195518231864035726554002698e-18, /* F^(13)(x_55) / (13!) */ + ( real_type )-5.81553412175946050342472453478759777298899e-19, /* F^(14)(x_55) / (14!) */ + ( real_type )+5.65632180085210457954640062913964397019752e-20, /* F^(15)(x_55) / (15!) */ + ( real_type )-4.81384606899058264990317485969076807654758e-21, /* F^(16)(x_55) / (16!) */ + ( real_type )+3.61273834927266188811170104921105629640448e-22, /* F^(17)(x_55) / (17!) */ + ( real_type )-2.40740944715276215087875807161275743692606e-23, /* F^(18)(x_55) / (18!) */ + ( real_type )+1.43314074360744703327036061609515971207309e-24, /* F^(19)(x_55) / (19!) */ + ( real_type )-7.66360181900291289920843922635045047051912e-26, /* F^(20)(x_55) / (20!) */ + ( real_type )+3.6993532152713526174462138501103077469417e-27, /* F^(21)(x_55) / (21!) */ + ( real_type )-1.61922652798764408021797440449866299824186e-28, /* F^(22)(x_55) / (22!) */ + ( real_type )+6.45270871696044669009179302581145017758055e-30, /* F^(23)(x_55) / (23!) */ + ( real_type )-2.34984233605416768438417325898649653806951e-31, /* F^(24)(x_55) / (24!) */ + + /* ===== n = 56, xn = 11.79, yn = 0 ==== */ + ( real_type )+0.0425649568093194227216046680664783565680491, /* F^(00)(x_56) / ( 0!) */ + ( real_type )-0.00363687634605791161742953871741234404364303, /* F^(01)(x_56) / ( 1!) */ + ( real_type )+0.000155950582629029615026231048066637285147217, /* F^(02)(x_56) / ( 2!) */ + ( real_type )-0.00000447491486961954925295846223151971008629353, /* F^(03)(x_56) / ( 3!) */ + ( real_type )+0.000000096670443931522468332759614651847301894022, /* F^(04)(x_56) / ( 4!) */ + ( real_type )-0.00000000167719498933967474920854047564863936649508, /* F^(05)(x_56) / ( 5!) */ + ( real_type )+0.0000000000243976335447419394828918088267415511190234, /* F^(06)(x_56) / ( 6!) */ + ( real_type )-0.00000000000033072491467216061933776838729574682333099, /* F^(07)(x_56) / ( 7!) */ + ( real_type )+1.29280142988371645112520490629136079200248e-14, /* F^(08)(x_56) / ( 8!) */ + ( real_type )-2.74256469212884109011032194027268988546208e-15, /* F^(09)(x_56) / ( 9!) */ + ( real_type )+6.1793896463787569649437269621867940852781e-16, /* F^(10)(x_56) / (10!) */ + ( real_type )-1.15882953600556198573091891345511969776717e-16, /* F^(11)(x_56) / (11!) */ + ( real_type )+1.81947605890251740563726471811500232711825e-17, /* F^(12)(x_56) / (12!) */ + ( real_type )-2.4242572586364111582958780726620332087404e-18, /* F^(13)(x_56) / (13!) */ + ( real_type )+2.77358369245548648847766426642209415721118e-19, /* F^(14)(x_56) / (14!) */ + ( real_type )-2.75266485560550944860686745361673539977229e-20, /* F^(15)(x_56) / (15!) */ + ( real_type )+2.39089172512828092135094269084959618228674e-21, /* F^(16)(x_56) / (16!) */ + ( real_type )-1.83162211352547613167316166220729329971469e-22, /* F^(17)(x_56) / (17!) */ + ( real_type )+1.2461391240550853012220690937688888174459e-23, /* F^(18)(x_56) / (18!) */ + ( real_type )-7.57548840391418119032159633241206550627715e-25, /* F^(19)(x_56) / (19!) */ + ( real_type )+4.13761971071386305871693737487293060335493e-26, /* F^(20)(x_56) / (20!) */ + ( real_type )-2.04048138289227963335795513322980507101037e-27, /* F^(21)(x_56) / (21!) */ + ( real_type )+9.12640763073970968882952526000804737330831e-29, /* F^(22)(x_56) / (22!) */ + ( real_type )-3.71722598174672402986236260247634591326318e-30, /* F^(23)(x_56) / (23!) */ + ( real_type )+1.38389356154005977449394507443013361887334e-31, /* F^(24)(x_56) / (24!) */ + + /* ===== n = 57, xn = 12.00, yn = 0 ==== */ + ( real_type )+0.0418128764539882603179291175888089559911153, /* F^(00)(x_57) / ( 0!) */ + ( real_type )-0.00350903489571824763029882213141494378676609, /* F^(01)(x_57) / ( 1!) */ + ( real_type )+0.000147771147315355622828373994085184725038914, /* F^(02)(x_57) / ( 2!) */ + ( real_type )-0.00000416362665003192417579485851438773490753895, /* F^(03)(x_57) / ( 3!) */ + ( real_type )+0.0000000883088369080686458433713513655721513536717, /* F^(04)(x_57) / ( 4!) */ + ( real_type )-0.00000000150395043110741649373932702319456714874573, /* F^(05)(x_57) / ( 5!) */ + ( real_type )+0.0000000000214243217597371531220918891789097507896899, /* F^(06)(x_57) / ( 6!) */ + ( real_type )-0.000000000000262590038052098850689040800591393932734309, /* F^(07)(x_57) / ( 7!) */ + ( real_type )+2.8269706992819211419086521016399087499645e-15, /* F^(08)(x_57) / ( 8!) */ + ( real_type )-2.71578675276715399203388924877694668588979e-17, /* F^(09)(x_57) / ( 9!) */ + ( real_type )+2.35731097125789265528773971198208157325511e-19, /* F^(10)(x_57) / (10!) */ + ( real_type )-1.86754401226970569109749430746365623167503e-21, /* F^(11)(x_57) / (11!) */ + ( real_type )+1.36170612295604617374940371047909914843341e-23, /* F^(12)(x_57) / (12!) */ + ( real_type )-9.20244623145964488181923428266731184113172e-26, /* F^(13)(x_57) / (13!) */ + ( real_type )+5.79870401504383263849268741106762156853552e-28, /* F^(14)(x_57) / (14!) */ + ( real_type )-3.42461278612059651332368614556376123179656e-30, /* F^(15)(x_57) / (15!) */ + ( real_type )+1.9041614581939639869268106820163992119702e-32, /* F^(16)(x_57) / (16!) */ + ( real_type )-1.00076649486078867463577147018912307606502e-34, /* F^(17)(x_57) / (17!) */ + ( real_type )+4.98918632251433948185051260600624049391598e-37, /* F^(18)(x_57) / (18!) */ + ( real_type )-2.36681289019109428882546176387272608866815e-39, /* F^(19)(x_57) / (19!) */ + ( real_type )+1.07143965031935657018826372532519733749315e-41, /* F^(20)(x_57) / (20!) */ + ( real_type )-4.6404142987610312840446978890645696443403e-44, /* F^(21)(x_57) / (21!) */ + ( real_type )+1.92728754365256421469894023453510847856372e-46, /* F^(22)(x_57) / (22!) */ + ( real_type )-7.69246977971710332979041639642750883429995e-49, /* F^(23)(x_57) / (23!) */ + ( real_type )+2.95643290382482343483113965698798248344002e-51, /* F^(24)(x_57) / (24!) */ + + /* ===== n = 58, xn = 12.21, yn = 0 ==== */ + ( real_type )+0.0410870020829319836299372632542900813461959, /* F^(00)(x_58) / ( 0!) */ + ( real_type )-0.00338784034107571856034938414447444703955907, /* F^(01)(x_58) / ( 1!) */ + ( real_type )+0.000140155777733130126739178728826335267688473, /* F^(02)(x_58) / ( 2!) */ + ( real_type )-0.00000387903150721473792479793476863325295214414, /* F^(03)(x_58) / ( 3!) */ + ( real_type )+0.0000000808029651140020604284592544512108247580423, /* F^(04)(x_58) / ( 4!) */ + ( real_type )-0.00000000135110840917251012292025344313335319693369, /* F^(05)(x_58) / ( 5!) */ + ( real_type )+0.0000000000187306534936096201425312018566413235024703, /* F^(06)(x_58) / ( 6!) */ + ( real_type )-0.000000000000143934797288541535020142462805808058685105, /* F^(07)(x_58) / ( 7!) */ + ( real_type )-2.86965003717084941993572954275049132611658e-14, /* F^(08)(x_58) / ( 8!) */ + ( real_type )+9.09607957088584299963747168643323364533613e-15, /* F^(09)(x_58) / ( 9!) */ + ( real_type )-2.15758837859031310507647617089801641240847e-15, /* F^(10)(x_58) / (10!) */ + ( real_type )+4.20424491212065674368567899444385637350966e-16, /* F^(11)(x_58) / (11!) */ + ( real_type )-6.85758321624119852080924200713135689201526e-17, /* F^(12)(x_58) / (12!) */ + ( real_type )+9.4948121772617928716363088536826134413283e-18, /* F^(13)(x_58) / (13!) */ + ( real_type )-1.129199897361635005815835814962729612971e-18, /* F^(14)(x_58) / (14!) */ + ( real_type )+1.16532659490814238841902605059568169701393e-19, /* F^(15)(x_58) / (15!) */ + ( real_type )-1.05284774390943653727706847346901484567579e-20, /* F^(16)(x_58) / (16!) */ + ( real_type )+8.39273076934820846268455518507287793424566e-22, /* F^(17)(x_58) / (17!) */ + ( real_type )-5.94360727762133580145063182003337251408261e-23, /* F^(18)(x_58) / (18!) */ + ( real_type )+3.76243461877671452243229598094226433615396e-24, /* F^(19)(x_58) / (19!) */ + ( real_type )-2.14065462836837981839123351330205599090795e-25, /* F^(20)(x_58) / (20!) */ + ( real_type )+1.10010441557008064034577272828607361518375e-26, /* F^(21)(x_58) / (21!) */ + ( real_type )-5.12954321057544711139150945162044631505833e-28, /* F^(22)(x_58) / (22!) */ + ( real_type )+2.17897723041523922851131198792125153463775e-29, /* F^(23)(x_58) / (23!) */ + ( real_type )-8.4639661927014846363105681696342093910476e-31, /* F^(24)(x_58) / (24!) */ + + /* ===== n = 59, xn = 12.42, yn = 0 ==== */ + ( real_type )+0.040385982894919183194249326844771251876668, /* F^(00)(x_59) / ( 0!) */ + ( real_type )-0.00327283823167651317297436620104486790450334, /* F^(01)(x_59) / ( 1!) */ + ( real_type )+0.000133056517689278922699219156577812425103344, /* F^(02)(x_59) / ( 2!) */ + ( real_type )-0.00000361842074553157230385925054716216944245369, /* F^(03)(x_59) / ( 3!) */ + ( real_type )+0.0000000740527450439771127918614585985829617556219, /* F^(04)(x_59) / ( 4!) */ + ( real_type )-0.00000000121662858569949270664153033299598195194921, /* F^(05)(x_59) / ( 5!) */ + ( real_type )+0.0000000000167365937367463138961844146979484233610755, /* F^(06)(x_59) / ( 6!) */ + ( real_type )-0.000000000000208762478676125370176574403648413483249823, /* F^(07)(x_59) / ( 7!) */ + ( real_type )+6.31586789820938311205687202519041479068077e-15, /* F^(08)(x_59) / ( 8!) */ + ( real_type )-1.29270167267266615228054827890362941781134e-15, /* F^(09)(x_59) / ( 9!) */ + ( real_type )+3.07099048267925675436412676711361896012986e-16, /* F^(10)(x_59) / (10!) */ + ( real_type )-6.09127813946519277825775347140299592715441e-17, /* F^(11)(x_59) / (11!) */ + ( real_type )+1.01205940047465318141363282967557518136506e-17, /* F^(12)(x_59) / (12!) */ + ( real_type )-1.42760196070506542048676039807772021918689e-18, /* F^(13)(x_59) / (13!) */ + ( real_type )+1.72998080201311924347240672522795972604995e-19, /* F^(14)(x_59) / (14!) */ + ( real_type )-1.81941928064072960248151086597124985478109e-20, /* F^(15)(x_59) / (15!) */ + ( real_type )+1.67545172727129657185566062774989894466155e-21, /* F^(16)(x_59) / (16!) */ + ( real_type )-1.3615042931284549701237198779781953302547e-22, /* F^(17)(x_59) / (17!) */ + ( real_type )+9.83071397746782629268395114970686344014432e-24, /* F^(18)(x_59) / (18!) */ + ( real_type )-6.34592260279912200943275109914970063825506e-25, /* F^(19)(x_59) / (19!) */ + ( real_type )+3.68244893285766967088467015517885194755645e-26, /* F^(20)(x_59) / (20!) */ + ( real_type )-1.93047304792413975541819855128514322358905e-27, /* F^(21)(x_59) / (21!) */ + ( real_type )+9.18386786001064544658915831428406555042137e-29, /* F^(22)(x_59) / (22!) */ + ( real_type )-3.98103699660091748707730748995897042114109e-30, /* F^(23)(x_59) / (23!) */ + ( real_type )+1.57832236239414542586394934224622611822372e-31, /* F^(24)(x_59) / (24!) */ + + /* ===== n = 60, xn = 12.63, yn = 0 ==== */ + ( real_type )+0.0397085596585740386496577961382040023237114, /* F^(00)(x_60) / ( 0!) */ + ( real_type )-0.00316361242713362284115938291059963727287192, /* F^(01)(x_60) / ( 1!) */ + ( real_type )+0.00012643023682007259932007674260294186474639, /* F^(02)(x_60) / ( 2!) */ + ( real_type )-0.00000337940092453809701724809752828262078853736, /* F^(03)(x_60) / ( 3!) */ + ( real_type )+0.0000000679696624694089942002275946935561307991749, /* F^(04)(x_60) / ( 4!) */ + ( real_type )-0.0000000010971141099039916530648802763511195468234, /* F^(05)(x_60) / ( 5!) */ + ( real_type )+0.0000000000146861666467948031357857074552709690920816, /* F^(06)(x_60) / ( 6!) */ + ( real_type )-0.000000000000108453950307438388955662118219984303070758, /* F^(07)(x_60) / ( 7!) */ + ( real_type )-2.27524741007060064467282080827706015980905e-14, /* F^(08)(x_60) / ( 8!) */ + ( real_type )+7.43102263307291462224495573069494779225095e-15, /* F^(09)(x_60) / ( 9!) */ + ( real_type )-1.82674992742971715779695090393880480273904e-15, /* F^(10)(x_60) / (10!) */ + ( real_type )+3.69117911770590331952085477150388252837239e-16, /* F^(11)(x_60) / (11!) */ + ( real_type )-6.24510259471819421039630774266591850652679e-17, /* F^(12)(x_60) / (12!) */ + ( real_type )+8.97154128538579775847458780105162582328883e-18, /* F^(13)(x_60) / (13!) */ + ( real_type )-1.10735517299046651742619772977157434002875e-18, /* F^(14)(x_60) / (14!) */ + ( real_type )+1.18638399225585209564532310410849275327917e-19, /* F^(15)(x_60) / (15!) */ + ( real_type )-1.11309892797502164726848277688971336320251e-20, /* F^(16)(x_60) / (16!) */ + ( real_type )+9.21709826282888201286357464459208998830786e-22, /* F^(17)(x_60) / (17!) */ + ( real_type )-6.78264590245562398030622550668001314883608e-23, /* F^(18)(x_60) / (18!) */ + ( real_type )+4.46287629046521333628504232499747320275961e-24, /* F^(19)(x_60) / (19!) */ + ( real_type )-2.64016802812393414937018301805998541657474e-25, /* F^(20)(x_60) / (20!) */ + ( real_type )+1.41124955720330511644324403889382264317621e-26, /* F^(21)(x_60) / (21!) */ + ( real_type )-6.84673029988308623188414518757373072177671e-28, /* F^(22)(x_60) / (22!) */ + ( real_type )+3.02723038184318304992217106651042882556506e-29, /* F^(23)(x_60) / (23!) */ + ( real_type )-1.22437014928144739216840681333886174920173e-30, /* F^(24)(x_60) / (24!) */ + + /* ===== n = 61, xn = 12.84, yn = 0 ==== */ + ( real_type )+0.0390535570577986355292801429237067764008847, /* F^(00)(x_61) / ( 0!) */ + ( real_type )-0.00305978127398593425408663664201734439065531, /* F^(01)(x_61) / ( 1!) */ + ( real_type )+0.000120238072484048120741649841172197156266956, /* F^(02)(x_61) / ( 2!) */ + ( real_type )-0.00000315985477475279428400338027405747059171869, /* F^(03)(x_61) / ( 3!) */ + ( real_type )+0.0000000624787673730941960706977171675491761061134, /* F^(04)(x_61) / ( 4!) */ + ( real_type )-0.000000000991617090354567925845596977591827829649762, /* F^(05)(x_61) / ( 5!) */ + ( real_type )+0.0000000000132609766922707930497553570777497356837923, /* F^(06)(x_61) / ( 6!) */ + ( real_type )-0.000000000000205279327156336921017508600439978107680136, /* F^(07)(x_61) / ( 7!) */ + ( real_type )+2.31810464355917217168936396521126822074178e-14, /* F^(08)(x_61) / ( 8!) */ + ( real_type )-6.71687709978957699769637601804941071000168e-15, /* F^(09)(x_61) / ( 9!) */ + ( real_type )+1.67366341858031196496829365804144190102475e-15, /* F^(10)(x_61) / (10!) */ + ( real_type )-3.44159902256629746347460577326145312084591e-16, /* F^(11)(x_61) / (11!) */ + ( real_type )+5.92720343370393570317383722583155769314169e-17, /* F^(12)(x_61) / (12!) */ + ( real_type )-8.66861197944437140941213398895406358199623e-18, /* F^(13)(x_61) / (13!) */ + ( real_type )+1.08942694151837545754114669201728756233795e-18, /* F^(14)(x_61) / (14!) */ + ( real_type )-1.18856434573678437268243500454874426738075e-19, /* F^(15)(x_61) / (15!) */ + ( real_type )+1.13573311033666095131242696070230057216563e-20, /* F^(16)(x_61) / (16!) */ + ( real_type )-9.57948196381427563358060389487799005391631e-22, /* F^(17)(x_61) / (17!) */ + ( real_type )+7.18147732986697944191805641440932525287685e-23, /* F^(18)(x_61) / (18!) */ + ( real_type )-4.81459021555637508582200872961536066979596e-24, /* F^(19)(x_61) / (19!) */ + ( real_type )+2.90248747183443589736075521776611658766652e-25, /* F^(20)(x_61) / (20!) */ + ( real_type )-1.58125870886696919461076613327883066703951e-26, /* F^(21)(x_61) / (21!) */ + ( real_type )+7.82006359347315265728606690337892618526142e-28, /* F^(22)(x_61) / (22!) */ + ( real_type )-3.52508658775848978740080071143555968996075e-29, /* F^(23)(x_61) / (23!) */ + ( real_type )+1.45380226987515448145438094831163523776761e-30, /* F^(24)(x_61) / (24!) */ + + /* ===== n = 62, xn = 13.05, yn = 0 ==== */ + ( real_type )+0.0384198767947746146529233779408053168285224, /* F^(00)(x_62) / ( 0!) */ + ( real_type )-0.00296099422148465570344894625064624701316656, /* F^(01)(x_62) / ( 1!) */ + ( real_type )+0.000114444942828340007117038921271147979080735, /* F^(02)(x_62) / ( 2!) */ + ( real_type )-0.00000295790312659756692836833111611817037136908, /* F^(03)(x_62) / ( 3!) */ + ( real_type )+0.0000000575131851976526583194644809327871053184833, /* F^(04)(x_62) / ( 4!) */ + ( real_type )-0.000000000897810853387014934677047684796869459635476, /* F^(05)(x_62) / ( 5!) */ + ( real_type )+0.0000000000120087365172502847995480952620880496211175, /* F^(06)(x_62) / ( 6!) */ + ( real_type )-0.000000000000290223316618381539166214582646232585172933, /* F^(07)(x_62) / ( 7!) */ + ( real_type )+6.47701324769161019677610581522490734471337e-14, /* F^(08)(x_62) / ( 8!) */ + ( real_type )-1.99788336287284428685125137690478997826579e-14, /* F^(09)(x_62) / ( 9!) */ + ( real_type )+5.07159346693105001016549135878033299795288e-15, /* F^(10)(x_62) / (10!) */ + ( real_type )-1.06115301949048832887689330656331932796128e-15, /* F^(11)(x_62) / (11!) */ + ( real_type )+1.85969242510523800423874841730246762962946e-16, /* F^(12)(x_62) / (12!) */ + ( real_type )-2.76799833043184646486178794511061978748191e-17, /* F^(13)(x_62) / (13!) */ + ( real_type )+3.54072752893591571930425690862466243283963e-18, /* F^(14)(x_62) / (14!) */ + ( real_type )-3.93232624818160343690480422826907307921419e-19, /* F^(15)(x_62) / (15!) */ + ( real_type )+3.82552505831626658012154735733249905894561e-20, /* F^(16)(x_62) / (16!) */ + ( real_type )-3.28549932713632325867931666179689704418723e-21, /* F^(17)(x_62) / (17!) */ + ( real_type )+2.50827798840474102945413863422821875666076e-22, /* F^(18)(x_62) / (18!) */ + ( real_type )-1.71270643081804317048678616370716653775485e-23, /* F^(19)(x_62) / (19!) */ + ( real_type )+1.05175898673375601805012693539954323144071e-24, /* F^(20)(x_62) / (20!) */ + ( real_type )-5.83758247087242097510226048378838155671621e-26, /* F^(21)(x_62) / (21!) */ + ( real_type )+2.94162963540146143776286479808868662366919e-27, /* F^(22)(x_62) / (22!) */ + ( real_type )-1.35132556880677755033777028118540035781512e-28, /* F^(23)(x_62) / (23!) */ + ( real_type )+5.68034214796877487630433562182677995853554e-30, /* F^(24)(x_62) / (24!) */ + + /* ===== n = 63, xn = 13.26, yn = 0 ==== */ + ( real_type )+0.0378064913642431632452704510295471834595252, /* F^(00)(x_63) / ( 0!) */ + ( real_type )-0.00286692881992380320430116109980608844388608, /* F^(01)(x_63) / ( 1!) */ + ( real_type )+0.000109019123688900774563809668675913257380098, /* F^(02)(x_63) / ( 2!) */ + ( real_type )-0.00000277187579329662975866685620071330505034947, /* F^(03)(x_63) / ( 3!) */ + ( real_type )+0.0000000530148018489849983924035752764531773465139, /* F^(04)(x_63) / ( 4!) */ + ( real_type )-0.000000000813979148569376120505457717367617002254876, /* F^(05)(x_63) / ( 5!) */ + ( real_type )+0.0000000000107207555945607216008197174563139166958613, /* F^(06)(x_63) / ( 6!) */ + ( real_type )-0.000000000000266444194829095258462527393370082623803295, /* F^(07)(x_63) / ( 7!) */ + ( real_type )+6.25735910201762866108616238025044972056188e-14, /* F^(08)(x_63) / ( 8!) */ + ( real_type )-1.96695776145098392719860052256186461677555e-14, /* F^(09)(x_63) / ( 9!) */ + ( real_type )+5.07856184799122354460028202419793175995612e-15, /* F^(10)(x_63) / (10!) */ + ( real_type )-1.08083983316765507029354203151233314576764e-15, /* F^(11)(x_63) / (11!) */ + ( real_type )+1.92689748965238323412636244829885423477515e-16, /* F^(12)(x_63) / (12!) */ + ( real_type )-2.91787524156906042797051325421738647908888e-17, /* F^(13)(x_63) / (13!) */ + ( real_type )+3.79775624570201112816070494483060714272044e-18, /* F^(14)(x_63) / (14!) */ + ( real_type )-4.29209281167537923546894249865534442258377e-19, /* F^(15)(x_63) / (15!) */ + ( real_type )+4.24959899710788094305295337153286462441825e-20, /* F^(16)(x_63) / (16!) */ + ( real_type )-3.71491657305605811690434131902861796097307e-21, /* F^(17)(x_63) / (17!) */ + ( real_type )+2.88714573816489965043930057161119655124659e-22, /* F^(18)(x_63) / (18!) */ + ( real_type )-2.00713724375683730823387785584176650742277e-23, /* F^(19)(x_63) / (19!) */ + ( real_type )+1.2550713895922998728675385671569181439391e-24, /* F^(20)(x_63) / (20!) */ + ( real_type )-7.09416842968300704362282317043020583658538e-26, /* F^(21)(x_63) / (21!) */ + ( real_type )+3.64109692512863766304652693863004896991988e-27, /* F^(22)(x_63) / (22!) */ + ( real_type )-1.70388738807295463420543938710718744864062e-28, /* F^(23)(x_63) / (23!) */ + ( real_type )+7.29716730668097099833161809231348345283148e-30, /* F^(24)(x_63) / (24!) */ + + /* ===== n = 64, xn = 13.47, yn = 0 ==== */ + ( real_type )+0.0372124384238227623469476082943136111737884, /* F^(00)(x_64) / ( 0!) */ + ( real_type )-0.00277728805248701389452965094719609916604401, /* F^(01)(x_64) / ( 1!) */ + ( real_type )+0.000103931878527448235261817598183457471122412, /* F^(02)(x_64) / ( 2!) */ + ( real_type )-0.00000260028543158028949745959187149162245430237, /* F^(03)(x_64) / ( 3!) */ + ( real_type )+0.0000000489331566494424412666595023867793965927098, /* F^(04)(x_64) / ( 4!) */ + ( real_type )-0.000000000739083377898480656630700294664929372524146, /* F^(05)(x_64) / ( 5!) */ + ( real_type )+0.00000000000953026162026245842440544043395319262146321, /* F^(06)(x_64) / ( 6!) */ + ( real_type )-0.000000000000213353980899392356356774510356525280729119, /* F^(07)(x_64) / ( 7!) */ + ( real_type )+4.72874443107905007763250883493998840708253e-14, /* F^(08)(x_64) / ( 8!) */ + ( real_type )-1.50732554112102802864157013664720856501113e-14, /* F^(09)(x_64) / ( 9!) */ + ( real_type )+3.95676245912555138448152218305702055179697e-15, /* F^(10)(x_64) / (10!) */ + ( real_type )-8.56278385606647730016618875221994696554762e-16, /* F^(11)(x_64) / (11!) */ + ( real_type )+1.55243317859179585165385749686693897102901e-16, /* F^(12)(x_64) / (12!) */ + ( real_type )-2.39093835324859001496972094454270766305264e-17, /* F^(13)(x_64) / (13!) */ + ( real_type )+3.16536418064895724874014292149586890832934e-18, /* F^(14)(x_64) / (14!) */ + ( real_type )-3.63922704639219049185797773249237185542568e-19, /* F^(15)(x_64) / (15!) */ + ( real_type )+3.66590259424964606242491909511778292147622e-20, /* F^(16)(x_64) / (16!) */ + ( real_type )-3.26080948079403415696360093175365116399645e-21, /* F^(17)(x_64) / (17!) */ + ( real_type )+2.57893234099839527414358641461710185104959e-22, /* F^(18)(x_64) / (18!) */ + ( real_type )-1.82471795790927382899217107980927602808959e-23, /* F^(19)(x_64) / (19!) */ + ( real_type )+1.16141703635471084618357404267428499730448e-24, /* F^(20)(x_64) / (20!) */ + ( real_type )-6.68308725878190377712950381356126595708688e-26, /* F^(21)(x_64) / (21!) */ + ( real_type )+3.49236570083655027068491456593052414215212e-27, /* F^(22)(x_64) / (22!) */ + ( real_type )-1.66416883421304875272417924928334365265546e-28, /* F^(23)(x_64) / (23!) */ + ( real_type )+7.25835606779771005011824512710783908467816e-30, /* F^(24)(x_64) / (24!) */ + + /* ===== n = 65, xn = 13.68, yn = 0 ==== */ + ( real_type )+0.03663681569465044069256321751749903523137, /* F^(00)(x_65) / ( 0!) */ + ( real_type )-0.00269179795885415947225036015905269636405221, /* F^(01)(x_65) / ( 1!) */ + ( real_type )+0.000099157134308502232757863000710505770495402, /* F^(02)(x_65) / ( 2!) */ + ( real_type )-0.00000244180480814111525763163707010197404690866, /* F^(03)(x_65) / ( 3!) */ + ( real_type )+0.0000000452241194923847928524778221804181905691152, /* F^(04)(x_65) / ( 4!) */ + ( real_type )-0.00000000067241359726209931604119460114712749719869, /* F^(05)(x_65) / ( 5!) */ + ( real_type )+0.00000000000870140706749593022470286435791922456780578, /* F^(06)(x_65) / ( 6!) */ + ( real_type )-0.000000000000285834828970198372006067753686406369638124, /* F^(07)(x_65) / ( 7!) */ + ( real_type )+8.33864319650701032019446229437747243986354e-14, /* F^(08)(x_65) / ( 8!) */ + ( real_type )-2.72925465784457512032061498584019079099423e-14, /* F^(09)(x_65) / ( 9!) */ + ( real_type )+7.28423588143002636471739494962346355912842e-15, /* F^(10)(x_65) / (10!) */ + ( real_type )-1.60247541598326221365900719824984573385237e-15, /* F^(11)(x_65) / (11!) */ + ( real_type )+2.95366773522392304546975030985698497613872e-16, /* F^(12)(x_65) / (12!) */ + ( real_type )-4.62523253652123949875905481455203833438975e-17, /* F^(13)(x_65) / (13!) */ + ( real_type )+6.22659221009222672905788972298360720815098e-18, /* F^(14)(x_65) / (14!) */ + ( real_type )-7.28020099007407506571675955125960508111822e-19, /* F^(15)(x_65) / (15!) */ + ( real_type )+7.45880793348995874007424366465109693384243e-20, /* F^(16)(x_65) / (16!) */ + ( real_type )-6.74863476944184890826593460830573922921198e-21, /* F^(17)(x_65) / (17!) */ + ( real_type )+5.42976573282304777933065869245069882614173e-22, /* F^(18)(x_65) / (18!) */ + ( real_type )-3.90874326082809482125404224456872054908224e-23, /* F^(19)(x_65) / (19!) */ + ( real_type )+2.53151471180808991786689887324141318150568e-24, /* F^(20)(x_65) / (20!) */ + ( real_type )-1.48242216038247159170928949363100623249906e-25, /* F^(21)(x_65) / (21!) */ + ( real_type )+7.88441889894649614846877963126572482322064e-27, /* F^(22)(x_65) / (22!) */ + ( real_type )-3.82433896195288226742448583718097879952681e-28, /* F^(23)(x_65) / (23!) */ + ( real_type )+1.69809205772298512067294143605827790238566e-29, /* F^(24)(x_65) / (24!) */ + + /* ===== n = 66, xn = 13.89, yn = 0 ==== */ + ( real_type )+0.0360787763348041155456745796410421736938601, /* F^(00)(x_66) / ( 0!) */ + ( real_type )-0.00261020551455638641094049804838379707924656, /* F^(01)(x_66) / ( 1!) */ + ( real_type )+0.0000946711968844146537879220342418378618949077, /* F^(02)(x_66) / ( 2!) */ + ( real_type )-0.00000229524665730754982302831043636994413825627, /* F^(03)(x_66) / ( 3!) */ + ( real_type )+0.0000000418478328589931553903429826222490193380221, /* F^(04)(x_66) / ( 4!) */ + ( real_type )-0.000000000612236852898233251743913130750030805733472, /* F^(05)(x_66) / ( 5!) */ + ( real_type )+0.00000000000762796579227945566937181237093524911216648, /* F^(06)(x_66) / ( 6!) */ + ( real_type )-0.000000000000161187830255719634891315165350327788808154, /* F^(07)(x_66) / ( 7!) */ + ( real_type )+3.59360338955028668950468662034126316676511e-14, /* F^(08)(x_66) / ( 8!) */ + ( real_type )-1.1831438403580795188521955132487647071677e-14, /* F^(09)(x_66) / ( 9!) */ + ( real_type )+3.20803660852554531438012411042493889358855e-15, /* F^(10)(x_66) / (10!) */ + ( real_type )-7.17217861404036866512509815216081640513342e-16, /* F^(11)(x_66) / (11!) */ + ( real_type )+1.3435991320808994665019418544941042533749e-16, /* F^(12)(x_66) / (12!) */ + ( real_type )-2.13861240359468604428204076166274912149162e-17, /* F^(13)(x_66) / (13!) */ + ( real_type )+2.92672640421448854140293432200583141083662e-18, /* F^(14)(x_66) / (14!) */ + ( real_type )-3.47897892507727931642772024952736463345905e-19, /* F^(15)(x_66) / (15!) */ + ( real_type )+3.62408950836219080732049154806132387617764e-20, /* F^(16)(x_66) / (16!) */ + ( real_type )-3.33435350745702275563737092920326766189896e-21, /* F^(17)(x_66) / (17!) */ + ( real_type )+2.72828068930900519147770915592514368183143e-22, /* F^(18)(x_66) / (18!) */ + ( real_type )-1.99758025452034750972337380961109785664057e-23, /* F^(19)(x_66) / (19!) */ + ( real_type )+1.31599573763232011716524757471741406525633e-24, /* F^(20)(x_66) / (20!) */ + ( real_type )-7.839738926291661993064540622803425557775e-26, /* F^(21)(x_66) / (21!) */ + ( real_type )+4.24233317859313180317429719071579141283095e-27, /* F^(22)(x_66) / (22!) */ + ( real_type )-2.09385956216690257675738769711830477277575e-28, /* F^(23)(x_66) / (23!) */ + ( real_type )+9.46150517857292166551825672596280130238301e-30, /* F^(24)(x_66) / (24!) */ + + /* ===== n = 67, xn = 14.11, yn = 0 ==== */ + ( real_type )+0.0355375247350140356986075299224587181901788, /* F^(00)(x_67) / ( 0!) */ + ( real_type )-0.00253227673513275668030182198235803132281976, /* F^(01)(x_67) / ( 1!) */ + ( real_type )+0.00009045250132400223836796411847325519635946, /* F^(02)(x_67) / ( 2!) */ + ( real_type )-0.00000215954819966561936481355218621725243142597, /* F^(03)(x_67) / ( 3!) */ + ( real_type )+0.0000000387702354717442352378724690733567175843153, /* F^(04)(x_67) / ( 4!) */ + ( real_type )-0.000000000558185928487421674386573283289438072434623, /* F^(05)(x_67) / ( 5!) */ + ( real_type )+0.00000000000662846211111376866216931897412372055357555, /* F^(06)(x_67) / ( 6!) */ + ( real_type )-1.89883399247861695456026923474566800498802e-14, /* F^(07)(x_67) / ( 7!) */ + ( real_type )-2.12214883487459473520510442723339870307781e-14, /* F^(08)(x_67) / ( 8!) */ + ( real_type )+7.44958568523103419992055937105285352673979e-15, /* F^(09)(x_67) / ( 9!) */ + ( real_type )-2.05440846715217319016734564891863034243363e-15, /* F^(10)(x_67) / (10!) */ + ( real_type )+4.66661380073808984853261750169557762488056e-16, /* F^(11)(x_67) / (11!) */ + ( real_type )-8.88280161869814953497518086402019708338364e-17, /* F^(12)(x_67) / (12!) */ + ( real_type )+1.43675040919718730910252023916959873407085e-17, /* F^(13)(x_67) / (13!) */ + ( real_type )-1.99820920221752274009687748390687723509514e-18, /* F^(14)(x_67) / (14!) */ + ( real_type )+2.41413478662412011159674803996683351783159e-19, /* F^(15)(x_67) / (15!) */ + ( real_type )-2.55623960904814504990342238154620595422152e-20, /* F^(16)(x_67) / (16!) */ + ( real_type )+2.39083724142684909781148621092262998233984e-21, /* F^(17)(x_67) / (17!) */ + ( real_type )-1.98887148763933350475012654950230814727835e-22, /* F^(18)(x_67) / (18!) */ + ( real_type )+1.48062674839238682619305006577659704989872e-23, /* F^(19)(x_67) / (19!) */ + ( real_type )-9.91892772875700838398703803943379205526868e-25, /* F^(20)(x_67) / (20!) */ + ( real_type )+6.00934023203151066404787152489235966911007e-26, /* F^(21)(x_67) / (21!) */ + ( real_type )-3.3074391099038411192534614854923218369086e-27, /* F^(22)(x_67) / (22!) */ + ( real_type )+1.66052101083628580036576715750532481150517e-28, /* F^(23)(x_67) / (23!) */ + ( real_type )-7.63335723359485082454621722158138729864082e-30, /* F^(24)(x_67) / (24!) */ + + /* ===== n = 68, xn = 14.32, yn = 0 ==== */ + ( real_type )+0.0350123126922469469101154386321616300405276, /* F^(00)(x_68) / ( 0!) */ + ( real_type )-0.00245779497801773053188413202872296399591873, /* F^(01)(x_68) / ( 1!) */ + ( real_type )+0.0000864813912665956524661588153859335582679475, /* F^(02)(x_68) / ( 2!) */ + ( real_type )-0.00000203375594556227663980524071710867898303691, /* F^(03)(x_68) / ( 3!) */ + ( real_type )+0.0000000359614419155704959200430263648499274516107, /* F^(04)(x_68) / ( 4!) */ + ( real_type )-0.000000000510195619428882952970317218694937989687981, /* F^(05)(x_68) / ( 5!) */ + ( real_type )+0.00000000000619803861253877894313502694973585911412423, /* F^(06)(x_68) / ( 6!) */ + ( real_type )-0.000000000000150906640356072302072353225834017993071755, /* F^(07)(x_68) / ( 7!) */ + ( real_type )+3.98410501879052700775671167311338856854115e-14, /* F^(08)(x_68) / ( 8!) */ + ( real_type )-1.36171051075562839113473957327845825859338e-14, /* F^(09)(x_68) / ( 9!) */ + ( real_type )+3.81025653213188058069157046226147766172383e-15, /* F^(10)(x_68) / (10!) */ + ( real_type )-8.79092889983048679152494650585296573824973e-16, /* F^(11)(x_68) / (11!) */ + ( real_type )+1.6997946921310311056603907316873933949181e-16, /* F^(12)(x_68) / (12!) */ + ( real_type )-2.79305627842775128601530423797633220103089e-17, /* F^(13)(x_68) / (13!) */ + ( real_type )+3.94666033587359806506317432081561857208673e-18, /* F^(14)(x_68) / (14!) */ + ( real_type )-4.84484607054482436782858337273983701587676e-19, /* F^(15)(x_68) / (15!) */ + ( real_type )+5.21302261622996092782654733040494709979448e-20, /* F^(16)(x_68) / (16!) */ + ( real_type )-4.95505228383474564311588285241250107423219e-21, /* F^(17)(x_68) / (17!) */ + ( real_type )+4.1894445329985415632929801705117745107938e-22, /* F^(18)(x_68) / (18!) */ + ( real_type )-3.17021601782447318373250320606571237897617e-23, /* F^(19)(x_68) / (19!) */ + ( real_type )+2.15895871452176628678021781275691857452218e-24, /* F^(20)(x_68) / (20!) */ + ( real_type )-1.32980001952640617192338238737089062194482e-25, /* F^(21)(x_68) / (21!) */ + ( real_type )+7.44176096830474488558872474363431866642371e-27, /* F^(22)(x_68) / (22!) */ + ( real_type )-3.79924906587634124455243684579890625218652e-28, /* F^(23)(x_68) / (23!) */ + ( real_type )+1.77617019229472464591643252810231654404498e-29, /* F^(24)(x_68) / (24!) */ + + /* ===== n = 69, xn = 14.53, yn = 0 ==== */ + ( real_type )+0.03450243592202935649461976396376297559226, /* F^(00)(x_69) / ( 0!) */ + ( real_type )-0.00238655941895807561474069129215845286452837, /* F^(01)(x_69) / ( 1!) */ + ( real_type )+0.0000827399240492381483542807844820309469642648, /* F^(02)(x_69) / ( 2!) */ + ( real_type )-0.00000191701236828080688904092984794482295677137, /* F^(03)(x_69) / ( 3!) */ + ( real_type )+0.0000000333940438618052039906139600740871566325675, /* F^(04)(x_69) / ( 4!) */ + ( real_type )-0.000000000467146764313275477675435377112648174692423, /* F^(05)(x_69) / ( 5!) */ + ( real_type )+0.00000000000595070256024633785401832776063897887052581, /* F^(06)(x_69) / ( 6!) */ + ( real_type )-0.000000000000350366959475725282049187227313662755324669, /* F^(07)(x_69) / ( 7!) */ + ( real_type )+0.000000000000132482522806019919588738756777627121763573, /* F^(08)(x_69) / ( 8!) */ + ( real_type )-4.64367183439982425898979686076030308661766e-14, /* F^(09)(x_69) / ( 9!) */ + ( real_type )+1.31966830915996781233673072404374587014614e-14, /* F^(10)(x_69) / (10!) */ + ( real_type )-3.09182667979809851523825116941127479433582e-15, /* F^(11)(x_69) / (11!) */ + ( real_type )+6.07127114641589084487285219027268594793035e-16, /* F^(12)(x_69) / (12!) */ + ( real_type )-1.01321514417978772639558846983347510737666e-16, /* F^(13)(x_69) / (13!) */ + ( real_type )+1.45421043771592835614263452203153947265439e-17, /* F^(14)(x_69) / (14!) */ + ( real_type )-1.81338621685553453711201096455058694203998e-18, /* F^(15)(x_69) / (15!) */ + ( real_type )+1.9822146257170740308320061360883061021759e-19, /* F^(16)(x_69) / (16!) */ + ( real_type )-1.91424977371244917555674004839658210806868e-20, /* F^(17)(x_69) / (17!) */ + ( real_type )+1.64450543885880190153672865925738479464744e-21, /* F^(18)(x_69) / (18!) */ + ( real_type )-1.26455031722232944955945110041019707696542e-22, /* F^(19)(x_69) / (19!) */ + ( real_type )+8.75186403065197064089006749264701996988861e-24, /* F^(20)(x_69) / (20!) */ + ( real_type )-5.47889292832992583402969796296747642716854e-25, /* F^(21)(x_69) / (21!) */ + ( real_type )+3.11655266749273736656379531065826957731204e-26, /* F^(22)(x_69) / (22!) */ + ( real_type )-1.61745264068100089919331302361710524413866e-27, /* F^(23)(x_69) / (23!) */ + ( real_type )+7.68771004657654214460911328258827846471534e-29, /* F^(24)(x_69) / (24!) */ + + /* ===== n = 70, xn = 14.74, yn = 0 ==== */ + ( real_type )+0.0340072308749504430827663193995038626461181, /* F^(00)(x_70) / ( 0!) */ + ( real_type )-0.00231838368274987595143798070892087114592442, /* F^(01)(x_70) / ( 1!) */ + ( real_type )+0.0000792116985764955620254653968563172194807884, /* F^(02)(x_70) / ( 2!) */ + ( real_type )-0.00000180854520812789784370203833385548406266755, /* F^(03)(x_70) / ( 3!) */ + ( real_type )+0.0000000310432058640755957772146103028290941576984, /* F^(04)(x_70) / ( 4!) */ + ( real_type )-0.000000000427401698246955113516482462046361643474186, /* F^(05)(x_70) / ( 5!) */ + ( real_type )+0.00000000000499500943175486781967207840113357245526819, /* F^(06)(x_70) / ( 6!) */ + ( real_type )-9.70224055338455307145953912735756675081502e-14, /* F^(07)(x_70) / ( 7!) */ + ( real_type )+2.23822217206284649438763851530912466557448e-14, /* F^(08)(x_70) / ( 8!) */ + ( real_type )-7.84482634000401155270042007920200643529188e-15, /* F^(09)(x_70) / ( 9!) */ + ( real_type )+2.26242107182668628973144024030869485839508e-15, /* F^(10)(x_70) / (10!) */ + ( real_type )-5.38123297142639863505179548469449980907911e-16, /* F^(11)(x_70) / (11!) */ + ( real_type )+1.07285603059185519539573343609955557434203e-16, /* F^(12)(x_70) / (12!) */ + ( real_type )-1.81799690456323668171206952386083667095628e-17, /* F^(13)(x_70) / (13!) */ + ( real_type )+2.6496183123183214353372550077114167842517e-18, /* F^(14)(x_70) / (14!) */ + ( real_type )-3.35541666580478021501198336208335516563945e-19, /* F^(15)(x_70) / (15!) */ + ( real_type )+3.72514323383760840808969601669149432467386e-20, /* F^(16)(x_70) / (16!) */ + ( real_type )-3.65395932459697163628888340031640751236916e-21, /* F^(17)(x_70) / (17!) */ + ( real_type )+3.18867659875634272633643301932891649932361e-22, /* F^(18)(x_70) / (18!) */ + ( real_type )-2.4909166846573913142531881328894518914893e-23, /* F^(19)(x_70) / (19!) */ + ( real_type )+1.75149975188554242902237050091142471237116e-24, /* F^(20)(x_70) / (20!) */ + ( real_type )-1.11410961218213784507865902640851894526675e-25, /* F^(21)(x_70) / (21!) */ + ( real_type )+6.43983881891994169442981566213014236623824e-27, /* F^(22)(x_70) / (22!) */ + ( real_type )-3.39655021765367335003902461389301605401718e-28, /* F^(23)(x_70) / (23!) */ + ( real_type )+1.64078105343367554166897202072838745065862e-29, /* F^(24)(x_70) / (24!) */ + + /* ===== n = 71, xn = 14.95, yn = 0 ==== */ + ( real_type )+0.0335260718267648448243884603329401316619297, /* F^(00)(x_71) / ( 0!) */ + ( real_type )-0.00225309461065420813242641383057879737056137, /* F^(01)(x_71) / ( 1!) */ + ( real_type )+0.0000758817030858682607610641667200850258640883, /* F^(02)(x_71) / ( 2!) */ + ( real_type )-0.00000170765935873224141992486277504974852915013, /* F^(03)(x_71) / ( 3!) */ + ( real_type )+0.0000000288890680008170827016877558494574450477795, /* F^(04)(x_71) / ( 4!) */ + ( real_type )-0.0000000003920562452795954591980128225435094500439, /* F^(05)(x_71) / ( 5!) */ + ( real_type )+0.0000000000045775299891818274938990808846593724772058, /* F^(06)(x_71) / ( 6!) */ + ( real_type )-0.000000000000125686516739534387480997719375255083882662, /* F^(07)(x_71) / ( 7!) */ + ( real_type )+3.82734317396004734310529423578217898938965e-14, /* F^(08)(x_71) / ( 8!) */ + ( real_type )-1.37376856878364382018912413759302487043448e-14, /* F^(09)(x_71) / ( 9!) */ + ( real_type )+4.02179291404181561810612973788486196708037e-15, /* F^(10)(x_71) / (10!) */ + ( real_type )-9.70933088108731003612281770152891448442444e-16, /* F^(11)(x_71) / (11!) */ + ( real_type )+1.96489958923769328481485420815694133452388e-16, /* F^(12)(x_71) / (12!) */ + ( real_type )-3.37999611407733787699212466638789226923223e-17, /* F^(13)(x_71) / (13!) */ + ( real_type )+5.00108019962751733821846871690664594941626e-18, /* F^(14)(x_71) / (14!) */ + ( real_type )-6.43010713852775697583246520446610069680707e-19, /* F^(15)(x_71) / (15!) */ + ( real_type )+7.24836929062472962759463877488381538494988e-20, /* F^(16)(x_71) / (16!) */ + ( real_type )-7.21973905032746637821096727504747729677615e-21, /* F^(17)(x_71) / (17!) */ + ( real_type )+6.3982933856519331797586354693615960334908e-22, /* F^(18)(x_71) / (18!) */ + ( real_type )-5.07627049098353510063958505492621945519129e-23, /* F^(19)(x_71) / (19!) */ + ( real_type )+3.62546811995474866449413398957134158082591e-24, /* F^(20)(x_71) / (20!) */ + ( real_type )-2.34254296790262545463242998083182851877854e-25, /* F^(21)(x_71) / (21!) */ + ( real_type )+1.37555534681537050964325484645549708789086e-26, /* F^(22)(x_71) / (22!) */ + ( real_type )-7.37094151011743313443885130067956589213859e-28, /* F^(23)(x_71) / (23!) */ + ( real_type )+3.61789940766732332566210291280275611206031e-29, /* F^(24)(x_71) / (24!) */ + + /* ===== n = 72, xn = 15.16, yn = 0 ==== */ + ( real_type )+0.0330583682149807959933751016989241274063893, /* F^(00)(x_72) / ( 0!) */ + ( real_type )-0.0021905311488914439558845248898179567108538, /* F^(01)(x_72) / ( 1!) */ + ( real_type )+0.0000727361788447552736876668872255121921872352, /* F^(02)(x_72) / ( 2!) */ + ( real_type )-0.0000016137262318222231379695696279458584288129, /* F^(03)(x_72) / ( 3!) */ + ( real_type )+0.0000000269124259737949862787046561436079810471431, /* F^(04)(x_72) / ( 4!) */ + ( real_type )-0.000000000360332965462718790134053667297869681493711, /* F^(05)(x_72) / ( 5!) */ + ( real_type )+0.00000000000441133144105864894389009908305060528794399, /* F^(06)(x_72) / ( 6!) */ + ( real_type )-0.000000000000278000138330715338111613900040691303544572, /* F^(07)(x_72) / ( 7!) */ + ( real_type )+0.000000000000111990832118093928709580022084401141948992, /* F^(08)(x_72) / ( 8!) */ + ( real_type )-4.10566722826193641974305888236496302954219e-14, /* F^(09)(x_72) / ( 9!) */ + ( real_type )+1.21977858182813568943652299671815874419973e-14, /* F^(10)(x_72) / (10!) */ + ( real_type )-2.98821630128582322626662526071521705081943e-15, /* F^(11)(x_72) / (11!) */ + ( real_type )+6.13696923971700004938465010720799741716299e-16, /* F^(12)(x_72) / (12!) */ + ( real_type )-1.07140063299167676634103526926544880082447e-16, /* F^(13)(x_72) / (13!) */ + ( real_type )+1.60899014887474782680712061647845067483145e-17, /* F^(14)(x_72) / (14!) */ + ( real_type )-2.09987707936326292128848489017166363414242e-18, /* F^(15)(x_72) / (15!) */ + ( real_type )+2.40289497109733915277006995937467267041809e-19, /* F^(16)(x_72) / (16!) */ + ( real_type )-2.42978587273208847347255197161204558773117e-20, /* F^(17)(x_72) / (17!) */ + ( real_type )+2.18623276925196079706489850264712510171866e-21, /* F^(18)(x_72) / (18!) */ + ( real_type )-1.76115263967586258652749262926015167418672e-22, /* F^(19)(x_72) / (19!) */ + ( real_type )+1.27723587508981260208776068883221507983729e-23, /* F^(20)(x_72) / (20!) */ + ( real_type )-8.38078486571059349349143712178859135922526e-25, /* F^(21)(x_72) / (21!) */ + ( real_type )+4.99805699218573527734382163337994380112377e-26, /* F^(22)(x_72) / (22!) */ + ( real_type )-2.7202485485843836611080943464009217822591e-27, /* F^(23)(x_72) / (23!) */ + ( real_type )+1.35625600347283138414007621503125074889427e-28, /* F^(24)(x_72) / (24!) */ + + /* ===== n = 73, xn = 15.37, yn = 0 ==== */ + ( real_type )+0.0326035621978688818527138577945325741429771, /* F^(00)(x_73) / ( 0!) */ + ( real_type )-0.00213054334502246159592895558019061066831152, /* F^(01)(x_73) / ( 1!) */ + ( real_type )+0.0000697624996592111379074012152859197268677997, /* F^(02)(x_73) / ( 2!) */ + ( real_type )-0.00000152617687457622058233789526412941229824221, /* F^(03)(x_73) / ( 3!) */ + ( real_type )+0.0000000250952820257841965376420728444833204892908, /* F^(04)(x_73) / ( 4!) */ + ( real_type )-0.000000000330451357018684352547920166798151017345363, /* F^(05)(x_73) / ( 5!) */ + ( real_type )+0.00000000000330328816221018399694080595776508755464261, /* F^(06)(x_73) / ( 6!) */ + ( real_type )+0.00000000000017587358509228800794271871270453554510203, /* F^(07)(x_73) / ( 7!) */ + ( real_type )-9.92124255194149858559663819281529728816029e-14, /* F^(08)(x_73) / ( 8!) */ + ( real_type )+3.71050402691033027922090777524962735818393e-14, /* F^(09)(x_73) / ( 9!) */ + ( real_type )-1.11844455838987829029531974302391562446617e-14, /* F^(10)(x_73) / (10!) */ + ( real_type )+2.77978123878829777522575344600524491737539e-15, /* F^(11)(x_73) / (11!) */ + ( real_type )-5.79223343493749034438834009510648884513274e-16, /* F^(12)(x_73) / (12!) */ + ( real_type )+1.02604731239754953506208279233965229671344e-16, /* F^(13)(x_73) / (13!) */ + ( real_type )-1.56358884681371795057589342883906939924642e-17, /* F^(14)(x_73) / (14!) */ + ( real_type )+2.07084451676489373183715549848609072115283e-18, /* F^(15)(x_73) / (15!) */ + ( real_type )-2.40493889872138926109664383530561559752147e-19, /* F^(16)(x_73) / (16!) */ + ( real_type )+2.46822392930272279231686197850236847434807e-20, /* F^(17)(x_73) / (17!) */ + ( real_type )-2.25419967020250961900037475843311647521813e-21, /* F^(18)(x_73) / (18!) */ + ( real_type )+1.84333818017153020083463703796154286602037e-22, /* F^(19)(x_73) / (19!) */ + ( real_type )-1.35713882081068872606541447407847814496435e-23, /* F^(20)(x_73) / (20!) */ + ( real_type )+9.04100305442395432107999336066273273865274e-25, /* F^(21)(x_73) / (21!) */ + ( real_type )-5.47451966384839978487709274578220664355467e-26, /* F^(22)(x_73) / (22!) */ + ( real_type )+3.02552651841260195450099269894585918864346e-27, /* F^(23)(x_73) / (23!) */ + ( real_type )-1.5318520659744341565025856575010582906853e-28, /* F^(24)(x_73) / (24!) */ + + /* ===== n = 74, xn = 15.58, yn = 0 ==== */ + ( real_type )+0.032161126414470307789361137738387026075656, /* F^(00)(x_74) / ( 0!) */ + ( real_type )-0.00207299144033792693010322129654330253806798, /* F^(01)(x_74) / ( 1!) */ + ( real_type )+0.0000669490648708018884072779966611949178289467, /* F^(02)(x_74) / ( 2!) */ + ( real_type )-0.00000144449729294797038149327308482571481160602, /* F^(03)(x_74) / ( 3!) */ + ( real_type )+0.0000000234240429661441036363013995285257221456545, /* F^(04)(x_74) / ( 4!) */ + ( real_type )-0.000000000303808743055978028293650935182130105987388, /* F^(05)(x_74) / ( 5!) */ + ( real_type )+0.00000000000267843471702308479987432235787682332102859, /* F^(06)(x_74) / ( 6!) */ + ( real_type )+0.000000000000363575388696951067906359059233028999301317, /* F^(07)(x_74) / ( 7!) */ + ( real_type )-0.000000000000188961105498947641120137339587877867134347, /* F^(08)(x_74) / ( 8!) */ + ( real_type )+7.15646467570282498843536320736874936415639e-14, /* F^(09)(x_74) / ( 9!) */ + ( real_type )-2.18781237375723083617505732049430354162268e-14, /* F^(10)(x_74) / (10!) */ + ( real_type )+5.5153995628386681654727546628808756312497e-15, /* F^(11)(x_74) / (11!) */ + ( real_type )-1.16576665507634424369478316244742035741507e-15, /* F^(12)(x_74) / (12!) */ + ( real_type )+2.09488766071693123147645362034821780607524e-16, /* F^(13)(x_74) / (13!) */ + ( real_type )-3.23871439063630723587069502697980361013784e-17, /* F^(14)(x_74) / (14!) */ + ( real_type )+4.35194779590998801131414940834006624096205e-18, /* F^(15)(x_74) / (15!) */ + ( real_type )-5.12809552621147115460274742396318362136288e-19, /* F^(16)(x_74) / (16!) */ + ( real_type )+5.34050954537302577525941306901853863849116e-20, /* F^(17)(x_74) / (17!) */ + ( real_type )-4.94956730541927224016893862190736881883223e-21, /* F^(18)(x_74) / (18!) */ + ( real_type )+4.10759550139362806089112233091218325335694e-22, /* F^(19)(x_74) / (19!) */ + ( real_type )-3.06934893515347653018502039543979242899947e-23, /* F^(20)(x_74) / (20!) */ + ( real_type )+2.07543890092419026142660161447895549837815e-24, /* F^(21)(x_74) / (21!) */ + ( real_type )-1.27568424267959206284273663028899926403598e-25, /* F^(22)(x_74) / (22!) */ + ( real_type )+7.15706478518999164769779069906396139711324e-27, /* F^(23)(x_74) / (23!) */ + ( real_type )-3.67892647124350321990036577789983406630162e-28, /* F^(24)(x_74) / (24!) */ + + /* ===== n = 75, xn = 15.79, yn = 0 ==== */ + ( real_type )+0.0317305619265100815660500263462697296874182, /* F^(00)(x_75) / ( 0!) */ + ( real_type )-0.0020177450476867447643582137574023443267306, /* F^(01)(x_75) / ( 1!) */ + ( real_type )+0.0000642852026929435384965030860853510706932433, /* F^(02)(x_75) / ( 2!) */ + ( real_type )-0.00000136822052466649693793776373557278142533087, /* F^(03)(x_75) / ( 3!) */ + ( real_type )+0.0000000218851338463157054189103149387793767820289, /* F^(04)(x_75) / ( 4!) */ + ( real_type )-0.000000000279969102016214308791858585319614339724333, /* F^(05)(x_75) / ( 5!) */ + ( real_type )+0.00000000000241877774521815369714378513971196993896231, /* F^(06)(x_75) / ( 6!) */ + ( real_type )+0.000000000000345726099862296954043759498496686525366063, /* F^(07)(x_75) / ( 7!) */ + ( real_type )-0.000000000000181386651050274552078269192685355987432572, /* F^(08)(x_75) / ( 8!) */ + ( real_type )+6.96489874306814920106236540782903083355312e-14, /* F^(09)(x_75) / ( 9!) */ + ( real_type )-2.15913356365947139350579035115080524990708e-14, /* F^(10)(x_75) / (10!) */ + ( real_type )+5.51985003467604965395369204153457844665638e-15, /* F^(11)(x_75) / (11!) */ + ( real_type )-1.18323164123708556208658191986882960304211e-15, /* F^(12)(x_75) / (12!) */ + ( real_type )+2.15652276434893745641642209499173583494898e-16, /* F^(13)(x_75) / (13!) */ + ( real_type )-3.38165114811443835190819841539199042197767e-17, /* F^(14)(x_75) / (14!) */ + ( real_type )+4.60925496970905342443022118481414116821814e-18, /* F^(15)(x_75) / (15!) */ + ( real_type )-5.50963093353865029149311148152430564454856e-19, /* F^(16)(x_75) / (16!) */ + ( real_type )+5.82099572863340014395676717665141491409747e-20, /* F^(17)(x_75) / (17!) */ + ( real_type )-5.47342589509192293473915797744907253115779e-21, /* F^(18)(x_75) / (18!) */ + ( real_type )+4.60878823363561850531697515492092476284668e-22, /* F^(19)(x_75) / (19!) */ + ( real_type )-3.49447950299938490773614069878835241405022e-23, /* F^(20)(x_75) / (20!) */ + ( real_type )+2.39780489958295035939427852628270406787578e-24, /* F^(21)(x_75) / (21!) */ + ( real_type )-1.49570411170516202754702763554913792798252e-25, /* F^(22)(x_75) / (22!) */ + ( real_type )+8.51662358908955997536402373211468390020474e-27, /* F^(23)(x_75) / (23!) */ + ( real_type )-4.44340078760473505768289465029013296881066e-28, /* F^(24)(x_75) / (24!) */ + + /* ===== n = 76, xn = 16.00, yn = 0 ==== */ + ( real_type )+0.03131139632518461178355335195230901508146, /* F^(00)(x_76) / ( 0!) */ + ( real_type )-0.00196468240590757707370726247388848260672024, /* F^(01)(x_76) / ( 1!) */ + ( real_type )+0.0000617610846683106978814238149533533130319532, /* F^(02)(x_76) / ( 2!) */ + ( real_type )-0.0000012969226087072509442555116243137122558063, /* F^(03)(x_76) / ( 3!) */ + ( real_type )+0.0000000204666895682228101183642922377031352812121, /* F^(04)(x_76) / ( 4!) */ + ( real_type )-0.000000000258910473180178066396061577985768043825563, /* F^(05)(x_76) / ( 5!) */ + ( real_type )+0.00000000000273498095768261325911815556842564694258827, /* F^(06)(x_76) / ( 6!) */ + ( real_type )-2.4814277260903445437288140021048045325925e-14, /* F^(07)(x_76) / ( 7!) */ + ( real_type )+1.9740221222577066900944693719524166926486e-16, /* F^(08)(x_76) / ( 8!) */ + ( real_type )-1.39878365430617344420082697513237141968246e-18, /* F^(09)(x_76) / ( 9!) */ + ( real_type )+8.93918665404068212327143827515514482091959e-21, /* F^(10)(x_76) / (10!) */ + ( real_type )-5.20433228765879264836415684172756225862381e-23, /* F^(11)(x_76) / (11!) */ + ( real_type )+2.78331025554031487183684048340107012952432e-25, /* F^(12)(x_76) / (12!) */ + ( real_type )-1.37695663686993989706683319923562514854918e-27, /* F^(13)(x_76) / (13!) */ + ( real_type )+6.33905409334066829373411075424098739858515e-30, /* F^(14)(x_76) / (14!) */ + ( real_type )-2.72961778021395560187984823583212880850103e-32, /* F^(15)(x_76) / (15!) */ + ( real_type )+1.10431551652513099363294276956313407196522e-34, /* F^(16)(x_76) / (16!) */ + ( real_type )-4.21409784127940059777185355750408372534358e-37, /* F^(17)(x_76) / (17!) */ + ( real_type )+1.52211678659644041252728388082704071634818e-39, /* F^(18)(x_76) / (18!) */ + ( real_type )-5.22002167184819299189670727232359303598567e-42, /* F^(19)(x_76) / (19!) */ + ( real_type )+1.70446846435290150024513499892977806819648e-44, /* F^(20)(x_76) / (20!) */ + ( real_type )-5.31241301771183240108219329368926943861819e-47, /* F^(21)(x_76) / (21!) */ + ( real_type )+1.58406748520807136006457984552964311647452e-49, /* F^(22)(x_76) / (22!) */ + ( real_type )-4.52834731432512092017748346064743652469683e-52, /* F^(23)(x_76) / (23!) */ + ( real_type )+1.24342280289210656039755537422423799235982e-54, /* F^(24)(x_76) / (24!) */ + + /* ===== n = 77, xn = 16.21, yn = 0 ==== */ + ( real_type )+0.0309031819875807833185597950940297846685743, /* F^(00)(x_77) / ( 0!) */ + ( real_type )-0.00191368970261901088162870468675453857740155, /* F^(01)(x_77) / ( 1!) */ + ( real_type )+0.0000593676484900112551646167339077728555034582, /* F^(02)(x_77) / ( 2!) */ + ( real_type )-0.00000123021707652470116645284569974948171447007, /* F^(03)(x_77) / ( 3!) */ + ( real_type )+0.0000000191562662250218223766421938201857759714516, /* F^(04)(x_77) / ( 4!) */ + ( real_type )-0.00000000023831108974480144427117603592758788586586, /* F^(05)(x_77) / ( 5!) */ + ( real_type )+0.00000000000177194148045589100694582594073656291874575, /* F^(06)(x_77) / ( 6!) */ + ( real_type )+0.00000000000044875146769378485031997308540299392868372, /* F^(07)(x_77) / ( 7!) */ + ( real_type )-0.000000000000235238499172101033708139709442530017574771, /* F^(08)(x_77) / ( 8!) */ + ( real_type )+9.27715048852948334609316331320665378432655e-14, /* F^(09)(x_77) / ( 9!) */ + ( real_type )-2.95547462055868234988720157289333305926766e-14, /* F^(10)(x_77) / (10!) */ + ( real_type )+7.7656337294812360366067890675663170142714e-15, /* F^(11)(x_77) / (11!) */ + ( real_type )-1.7110863169574877873914409551712894898665e-15, /* F^(12)(x_77) / (12!) */ + ( real_type )+3.2059732101187225889210286410652473703655e-16, /* F^(13)(x_77) / (13!) */ + ( real_type )-5.16880539250328087169414378902632390664141e-17, /* F^(14)(x_77) / (14!) */ + ( real_type )+7.24436221973319575082177457381001453787922e-18, /* F^(15)(x_77) / (15!) */ + ( real_type )-8.90539485487712135055693078679705007002042e-19, /* F^(16)(x_77) / (16!) */ + ( real_type )+9.67705268236321794427469463857290899633912e-20, /* F^(17)(x_77) / (17!) */ + ( real_type )-9.35997837068133482262682909019955722075005e-21, /* F^(18)(x_77) / (18!) */ + ( real_type )+8.10825680410301230433202829837940150090779e-22, /* F^(19)(x_77) / (19!) */ + ( real_type )-6.32564029462345860176344018424954589374978e-23, /* F^(20)(x_77) / (20!) */ + ( real_type )+4.4665682566059112008899689620047805428172e-24, /* F^(21)(x_77) / (21!) */ + ( real_type )-2.86748812400304216259637312819328825931691e-25, /* F^(22)(x_77) / (22!) */ + ( real_type )+1.68065154615908767294541881832674129630965e-26, /* F^(23)(x_77) / (23!) */ + ( real_type )-9.0269139408771915842482362876770262051121e-28, /* F^(24)(x_77) / (24!) */ + + /* ===== n = 78, xn = 16.42, yn = 0 ==== */ + ( real_type )+0.0305054944690889621935730957812503043992676, /* F^(00)(x_78) / ( 0!) */ + ( real_type )-0.00186466045850051619656380388268680946162988, /* F^(01)(x_78) / ( 1!) */ + ( real_type )+0.0000570965299860708461779311777935913100310069, /* F^(02)(x_78) / ( 2!) */ + ( real_type )-0.00000116775438478593808171828612155975113191984, /* F^(03)(x_78) / ( 3!) */ + ( real_type )+0.0000000179474439129206529704655364125516216229811, /* F^(04)(x_78) / ( 4!) */ + ( real_type )-0.000000000222185991991735046222460699163809874397933, /* F^(05)(x_78) / ( 5!) */ + ( real_type )+0.00000000000327994921731825631953258458673877689073777, /* F^(06)(x_78) / ( 6!) */ + ( real_type )-0.000000000000686906941273472650534326048105207077106533, /* F^(07)(x_78) / ( 7!) */ + ( real_type )+0.000000000000337849089429562310550662436698855098907502, /* F^(08)(x_78) / ( 8!) */ + ( real_type )-0.000000000000134863563244313098042124339500189661140697, /* F^(09)(x_78) / ( 9!) */ + ( real_type )+4.35412576480549056907142644158728400769293e-14, /* F^(10)(x_78) / (10!) */ + ( real_type )-1.15951558208168193414070070392717500548597e-14, /* F^(11)(x_78) / (11!) */ + ( real_type )+2.58953288687734418752936003009764656756143e-15, /* F^(12)(x_78) / (12!) */ + ( real_type )-4.9179399812484458350680306942194217427153e-16, /* F^(13)(x_78) / (13!) */ + ( real_type )+8.03732682617408831932920011461750903979529e-17, /* F^(14)(x_78) / (14!) */ + ( real_type )-1.14194268939987329233220048801378314886644e-17, /* F^(15)(x_78) / (15!) */ + ( real_type )+1.42313118878675948459108254798181936298424e-18, /* F^(16)(x_78) / (16!) */ + ( real_type )-1.56786145133799090575210774216391237041489e-19, /* F^(17)(x_78) / (17!) */ + ( real_type )+1.53758021099018046969454144312457524177236e-20, /* F^(18)(x_78) / (18!) */ + ( real_type )-1.35056240611145681518100771745430764154356e-21, /* F^(19)(x_78) / (19!) */ + ( real_type )+1.06842018051808595805681732164163068211957e-22, /* F^(20)(x_78) / (20!) */ + ( real_type )-7.65047746660899710275726341184197078515402e-24, /* F^(21)(x_78) / (21!) */ + ( real_type )+4.98104041283350720267701680120955999076882e-25, /* F^(22)(x_78) / (22!) */ + ( real_type )-2.96092389495694497316880411824105226345912e-26, /* F^(23)(x_78) / (23!) */ + ( real_type )+1.61304921432213544492275408991611766863934e-27, /* F^(24)(x_78) / (24!) */ + + /* ===== n = 79, xn = 16.63, yn = 0 ==== */ + ( real_type )+0.0301179310195570542608243395223315653783517, /* F^(00)(x_79) / ( 0!) */ + ( real_type )-0.00181749496631886878114492625337248471598465, /* F^(01)(x_79) / ( 1!) */ + ( real_type )+0.0000549399996099607985045650824606912094909124, /* F^(02)(x_79) / ( 2!) */ + ( real_type )-0.00000110921282711531113160064068002025771714086, /* F^(03)(x_79) / ( 3!) */ + ( real_type )+0.0000000168284375352188442824946243129264337476014, /* F^(04)(x_79) / ( 4!) */ + ( real_type )-0.000000000206422451921797801902645376843383579534442, /* F^(05)(x_79) / ( 5!) */ + ( real_type )+0.00000000000374687770016519842431647733186714814364379, /* F^(06)(x_79) / ( 6!) */ + ( real_type )-0.00000000000113929593018226210391094909065972043182885, /* F^(07)(x_79) / ( 7!) */ + ( real_type )+0.00000000000057540693638628407709637029294071127907623, /* F^(08)(x_79) / ( 8!) */ + ( real_type )-0.00000000000023277812094071851580236595484754911993667, /* F^(09)(x_79) / ( 9!) */ + ( real_type )+7.61506718320542770666717976075635305576686e-14, /* F^(10)(x_79) / (10!) */ + ( real_type )-2.05492247665435290505309492800060590466504e-14, /* F^(11)(x_79) / (11!) */ + ( real_type )+4.65060091199470205465440235433644602356468e-15, /* F^(12)(x_79) / (12!) */ + ( real_type )-8.9508166654830184457367628919088754604495e-16, /* F^(13)(x_79) / (13!) */ + ( real_type )+1.48253906835186510774594067434928649229242e-16, /* F^(14)(x_79) / (14!) */ + ( real_type )-2.13489968980600633581873304368671098675221e-17, /* F^(15)(x_79) / (15!) */ + ( real_type )+2.6967494810015394120283376813486352981083e-18, /* F^(16)(x_79) / (16!) */ + ( real_type )-3.01154945248603219283877625884068379233187e-19, /* F^(17)(x_79) / (17!) */ + ( real_type )+2.99385797832142197636556621659744131256402e-20, /* F^(18)(x_79) / (18!) */ + ( real_type )-2.66590040412756998532046080676808183936744e-21, /* F^(19)(x_79) / (19!) */ + ( real_type )+2.13812091558183659228629153904352827089047e-22, /* F^(20)(x_79) / (20!) */ + ( real_type )-1.55226197752631210382636393934956468767578e-23, /* F^(21)(x_79) / (21!) */ + ( real_type )+1.02472788468113033454161698487313128929794e-24, /* F^(22)(x_79) / (22!) */ + ( real_type )-6.17666128071672548737686628681956579413513e-26, /* F^(23)(x_79) / (23!) */ + ( real_type )+3.41223232959533226157957502158812265079865e-27, /* F^(24)(x_79) / (24!) */ + + /* ===== n = 80, xn = 16.84, yn = 0 ==== */ + ( real_type )+0.0297401092122126450370115395178546539000625, /* F^(00)(x_80) / ( 0!) */ + ( real_type )-0.00177209977979430353191880099686532554757218, /* F^(01)(x_80) / ( 1!) */ + ( real_type )+0.0000528909079509695534731954943362427969100832, /* F^(02)(x_80) / ( 2!) */ + ( real_type )-0.00000105429983934938735277997554898909396902844, /* F^(03)(x_80) / ( 3!) */ + ( real_type )+0.0000000157907778662067686264917158658574859018825, /* F^(04)(x_80) / ( 4!) */ + ( real_type )-0.000000000189998653796108439039589888468770410071129, /* F^(05)(x_80) / ( 5!) */ + ( real_type )+0.00000000000232343076949993080978410100635151274619343, /* F^(06)(x_80) / ( 6!) */ + ( real_type )-0.000000000000304694828254137689007782941223390524091641, /* F^(07)(x_80) / ( 7!) */ + ( real_type )+0.000000000000149993240288714685174874041889371427647644, /* F^(08)(x_80) / ( 8!) */ + ( real_type )-6.14349404487136056922040629143054032624415e-14, /* F^(09)(x_80) / ( 9!) */ + ( real_type )+2.03605563657087246261393212184264318775757e-14, /* F^(10)(x_80) / (10!) */ + ( real_type )-5.56646511541908279066834234723729948132741e-15, /* F^(11)(x_80) / (11!) */ + ( real_type )+1.27638932983298143059362016632044373191611e-15, /* F^(12)(x_80) / (12!) */ + ( real_type )-2.48913743868249031889237674645216937400649e-16, /* F^(13)(x_80) / (13!) */ + ( real_type )+4.17759967456452643145169738504003906775398e-17, /* F^(14)(x_80) / (14!) */ + ( real_type )-6.09614383582269146646181023114435122545806e-18, /* F^(15)(x_80) / (15!) */ + ( real_type )+7.80365857285681397826256773482399707490367e-19, /* F^(16)(x_80) / (16!) */ + ( real_type )-8.83183039615292614554348896835857735590077e-20, /* F^(17)(x_80) / (17!) */ + ( real_type )+8.89853270386215665421486378688941324093266e-21, /* F^(18)(x_80) / (18!) */ + ( real_type )-8.03121617971148426485393971662093504860915e-22, /* F^(19)(x_80) / (19!) */ + ( real_type )+6.52895750123435067249487941085616788797116e-23, /* F^(20)(x_80) / (20!) */ + ( real_type )-4.80479734261303973912081601492778258927423e-24, /* F^(21)(x_80) / (21!) */ + ( real_type )+3.2154493897061971692413457218313845204943e-25, /* F^(22)(x_80) / (22!) */ + ( real_type )-1.96487479386358202842171339856409080155166e-26, /* F^(23)(x_80) / (23!) */ + ( real_type )+1.10050717477051435483843724876013578892241e-27, /* F^(24)(x_80) / (24!) */ + + /* ===== n = 81, xn = 17.05, yn = 0 ==== */ + ( real_type )+0.0293716656754489088365959105310074286753141, /* F^(00)(x_81) / ( 0!) */ + ( real_type )-0.00172838724688898054913394257884054402577088, /* F^(01)(x_81) / ( 1!) */ + ( real_type )+0.0000509426357500095389199983763204726226905699, /* F^(02)(x_81) / ( 2!) */ + ( real_type )-0.00000100275014687454271902259571385507088971908, /* F^(03)(x_81) / ( 3!) */ + ( real_type )+0.0000000148312796313029952807379331249395308957787, /* F^(04)(x_81) / ( 4!) */ + ( real_type )-0.000000000177984854265650425252928290397497318087979, /* F^(05)(x_81) / ( 5!) */ + ( real_type )+0.00000000000382523445356534725006763203575858160334509, /* F^(06)(x_81) / ( 6!) */ + ( real_type )-0.00000000000145168046784234732452140941107221421299618, /* F^(07)(x_81) / ( 7!) */ + ( real_type )+0.000000000000756515941364826535982867075564844654781407, /* F^(08)(x_81) / ( 8!) */ + ( real_type )-0.000000000000314052532671966462895621469577240879257905, /* F^(09)(x_81) / ( 9!) */ + ( real_type )+0.000000000000105427296185441215340564162686957944320139, /* F^(10)(x_81) / (10!) */ + ( real_type )-2.9196819618970172571342442689621530835368e-14, /* F^(11)(x_81) / (11!) */ + ( real_type )+6.78192095258415067794477363653863506476552e-15, /* F^(12)(x_81) / (12!) */ + ( real_type )-1.33984060473823011401205714894393786142256e-15, /* F^(13)(x_81) / (13!) */ + ( real_type )+2.27817571224420626978758065745754861334495e-16, /* F^(14)(x_81) / (14!) */ + ( real_type )-3.36816551907751200389756010070643628438293e-17, /* F^(15)(x_81) / (15!) */ + ( real_type )+4.36853920981093946635729969011057541114978e-18, /* F^(16)(x_81) / (16!) */ + ( real_type )-5.00968762862756787955426150286548910369868e-19, /* F^(17)(x_81) / (17!) */ + ( real_type )+5.11473015491659252642159679886818910888643e-20, /* F^(18)(x_81) / (18!) */ + ( real_type )-4.67791962126554342535896542512340814773527e-21, /* F^(19)(x_81) / (19!) */ + ( real_type )+3.85394383089702621510444614462736718586924e-22, /* F^(20)(x_81) / (20!) */ + ( real_type )-2.8744183430209182015547959431846958017398e-23, /* F^(21)(x_81) / (21!) */ + ( real_type )+1.94963577104371363362335252590588986335263e-24, /* F^(22)(x_81) / (22!) */ + ( real_type )-1.20755639790030975171709814841919326297749e-25, /* F^(23)(x_81) / (23!) */ + ( real_type )+6.85567591585208618898704776728522893355784e-27, /* F^(24)(x_81) / (24!) */ + + /* ===== n = 82, xn = 17.26, yn = 0 ==== */ + ( real_type )+0.0290122549185626204232460038847629393025906, /* F^(00)(x_82) / ( 0!) */ + ( real_type )-0.00168627508300417947043779771066457848940295, /* F^(01)(x_82) / ( 1!) */ + ( real_type )+0.0000490890466495023748323337501653397775108388, /* F^(02)(x_82) / ( 2!) */ + ( real_type )-0.000000954315935638296194678910232092680971619599, /* F^(03)(x_82) / ( 3!) */ + ( real_type )+0.0000000139363910691630572196800128828560796895891, /* F^(04)(x_82) / ( 4!) */ + ( real_type )-0.000000000160570848015056599386252598711672933712071, /* F^(05)(x_82) / ( 5!) */ + ( real_type )-0.00000000000085101729204985875780321712879734873836623, /* F^(06)(x_82) / ( 6!) */ + ( real_type )+0.00000000000169196138311500088076497671941657574565859, /* F^(07)(x_82) / ( 7!) */ + ( real_type )-0.000000000000908969456551479039002087951001317618822228, /* F^(08)(x_82) / ( 8!) */ + ( real_type )+0.000000000000382226866100048088576328098797156838225399, /* F^(09)(x_82) / ( 9!) */ + ( real_type )-0.000000000000129948922696241389494648883901353421372571, /* F^(10)(x_82) / (10!) */ + ( real_type )+3.64480344486968989206545810774951181265857e-14, /* F^(11)(x_82) / (11!) */ + ( real_type )-8.57492548907960046212725092115751705580637e-15, /* F^(12)(x_82) / (12!) */ + ( real_type )+1.71589297573404272140467894213431476512828e-15, /* F^(13)(x_82) / (13!) */ + ( real_type )-2.95531854260821528289974421391700183044826e-16, /* F^(14)(x_82) / (14!) */ + ( real_type )+4.42599935915370317660549826377157619969306e-17, /* F^(15)(x_82) / (15!) */ + ( real_type )-5.81535261072934736756506415184766884894586e-18, /* F^(16)(x_82) / (16!) */ + ( real_type )+6.75606233447195684815048728572741632287468e-19, /* F^(17)(x_82) / (17!) */ + ( real_type )-6.98828241810515923279628240523171395366476e-20, /* F^(18)(x_82) / (18!) */ + ( real_type )+6.47570386887353018792273802605441226704071e-21, /* F^(19)(x_82) / (19!) */ + ( real_type )-5.40565274949859567190352283713200609510751e-22, /* F^(20)(x_82) / (20!) */ + ( real_type )+4.08529637207600162789965965225009703515852e-23, /* F^(21)(x_82) / (21!) */ + ( real_type )-2.80789239508427749589826788798850453036881e-24, /* F^(22)(x_82) / (22!) */ + ( real_type )+1.76242483445642736046427089596632686030167e-25, /* F^(23)(x_82) / (23!) */ + ( real_type )-1.01403456299844866920494583755091669919842e-26, /* F^(24)(x_82) / (24!) */ + + /* ===== n = 83, xn = 17.47, yn = 0 ==== */ + ( real_type )+0.0286615482434347706561739985173263709460918, /* F^(00)(x_83) / ( 0!) */ + ( real_type )-0.00164568598108878426785994816290920922219211, /* F^(01)(x_83) / ( 1!) */ + ( real_type )+0.0000473244499004134493576956357702889833332968, /* F^(02)(x_83) / ( 2!) */ + ( real_type )-0.00000090877832273466631069662592359055902630468, /* F^(03)(x_83) / ( 3!) */ + ( real_type )+0.0000000131110994207720809519127239886030754524064, /* F^(04)(x_83) / ( 4!) */ + ( real_type )-0.000000000152370419817539848025777692631070808039279, /* F^(05)(x_83) / ( 5!) */ + ( real_type )+0.00000000000223626193125532094145012829072760615211112, /* F^(06)(x_83) / ( 6!) */ + ( real_type )-0.000000000000558394482823967780953712471858412440002333, /* F^(07)(x_83) / ( 7!) */ + ( real_type )+0.000000000000294929464612088808623598510996888693521079, /* F^(08)(x_83) / ( 8!) */ + ( real_type )-0.000000000000125523580707610860028680787128783075305432, /* F^(09)(x_83) / ( 9!) */ + ( real_type )+4.32117893949370229646401755931863666523521e-14, /* F^(10)(x_83) / (10!) */ + ( real_type )-1.22730050245973296008086852771087027978475e-14, /* F^(11)(x_83) / (11!) */ + ( real_type )+2.92397596699399537129959800221060253512911e-15, /* F^(12)(x_83) / (12!) */ + ( real_type )-5.9254298541662658560240120046789603342162e-16, /* F^(13)(x_83) / (13!) */ + ( real_type )+1.03357018164309440714625365681930815608656e-16, /* F^(14)(x_83) / (14!) */ + ( real_type )-1.56773635323239550460167455929083019436342e-17, /* F^(15)(x_83) / (15!) */ + ( real_type )+2.08633462296741813085949777894186952828974e-18, /* F^(16)(x_83) / (16!) */ + ( real_type )-2.4550946116574325011094888469546293531462e-19, /* F^(17)(x_83) / (17!) */ + ( real_type )+2.57236372163565703441611669580113255658403e-20, /* F^(18)(x_83) / (18!) */ + ( real_type )-2.4146664762080273451687389289683408545558e-21, /* F^(19)(x_83) / (19!) */ + ( real_type )+2.0419621918018644050983833642063829333608e-22, /* F^(20)(x_83) / (20!) */ + ( real_type )-1.5634135723934662697112596144159847599817e-23, /* F^(21)(x_83) / (21!) */ + ( real_type )+1.08868727710649476261851978233606196373191e-24, /* F^(22)(x_83) / (22!) */ + ( real_type )-6.92352935775823193943130146069572976388001e-26, /* F^(23)(x_83) / (23!) */ + ( real_type )+4.03632439846646300355716251255867380918003e-27, /* F^(24)(x_83) / (24!) */ + + /* ===== n = 84, xn = 17.68, yn = 0 ==== */ + ( real_type )+0.028319232734848067795097432772015190860891, /* F^(00)(x_84) / ( 0!) */ + ( real_type )-0.00160654725357366032120866411585324476628934, /* F^(01)(x_84) / ( 1!) */ + ( real_type )+0.0000456435589114874406114706882421548127100472, /* F^(02)(x_84) / ( 2!) */ + ( real_type )-0.000000865928483157957571776388409109504766592824, /* F^(03)(x_84) / ( 3!) */ + ( real_type )+0.0000000123427449670170360543364702501923327235931, /* F^(04)(x_84) / ( 4!) */ + ( real_type )-0.000000000143166374389158778176085381557303125231624, /* F^(05)(x_84) / ( 5!) */ + ( real_type )+0.00000000000351307286576785781986764586591218368053672, /* F^(06)(x_84) / ( 6!) */ + ( real_type )-0.00000000000156183092874829353048730051052972800569541, /* F^(07)(x_84) / ( 7!) */ + ( real_type )+0.000000000000847433731195112005807560670035451583136764, /* F^(08)(x_84) / ( 8!) */ + ( real_type )-0.000000000000365209077616348806640076620508371197251689, /* F^(09)(x_84) / ( 9!) */ + ( real_type )+0.000000000000127285498224459829987807290925401007894018, /* F^(10)(x_84) / (10!) */ + ( real_type )-3.66020271039125146769880797061467730000024e-14, /* F^(11)(x_84) / (11!) */ + ( real_type )+8.82925755647234869566468261813540753028989e-15, /* F^(12)(x_84) / (12!) */ + ( real_type )-1.81169562715260917668007554542018229799505e-15, /* F^(13)(x_84) / (13!) */ + ( real_type )+3.19992175734512989415197978996707147539561e-16, /* F^(14)(x_84) / (14!) */ + ( real_type )-4.91502415284628026297239325005247454208008e-17, /* F^(15)(x_84) / (15!) */ + ( real_type )+6.62383130385133641070201614649044395159643e-18, /* F^(16)(x_84) / (16!) */ + ( real_type )-7.89379496004006272280939523868299194123437e-19, /* F^(17)(x_84) / (17!) */ + ( real_type )+8.37649139319211856192084548117393008728486e-20, /* F^(18)(x_84) / (18!) */ + ( real_type )-7.96377815094819714381654093471599183373484e-21, /* F^(19)(x_84) / (19!) */ + ( real_type )+6.82122248628071782045503573997534005875327e-22, /* F^(20)(x_84) / (20!) */ + ( real_type )-5.29007008701829113848461758739999393087317e-23, /* F^(21)(x_84) / (21!) */ + ( real_type )+3.73150875759727023555780870707955982281388e-24, /* F^(22)(x_84) / (22!) */ + ( real_type )-2.40393985398337056904289572015407722731762e-25, /* F^(23)(x_84) / (23!) */ + ( real_type )+1.4197702555015259064716989218721164816986e-26, /* F^(24)(x_84) / (24!) */ + + /* ===== n = 85, xn = 17.89, yn = 0 ==== */ + ( real_type )+0.0279850103229300360218746106322339780429843, /* F^(00)(x_85) / ( 0!) */ + ( real_type )-0.00156879050486447849532153072629904053168648, /* F^(01)(x_85) / ( 1!) */ + ( real_type )+0.000044041461006632124055086903746258665225117, /* F^(02)(x_85) / ( 2!) */ + ( real_type )-0.000000825578316285110439474950078941988977585139, /* F^(03)(x_85) / ( 3!) */ + ( real_type )+0.0000000116274637824612228403252281287238101042879, /* F^(04)(x_85) / ( 4!) */ + ( real_type )-0.000000000134066036558068640554583485953023797658687, /* F^(05)(x_85) / ( 5!) */ + ( real_type )+0.00000000000408798262102541644507966129278676921151548, /* F^(06)(x_85) / ( 6!) */ + ( real_type )-0.00000000000207383811395937903288478151795937702441702, /* F^(07)(x_85) / ( 7!) */ + ( real_type )+0.00000000000114146218078567653677915417878008598417359, /* F^(08)(x_85) / ( 8!) */ + ( real_type )-0.000000000000497949026571119570458318356351196595078141, /* F^(09)(x_85) / ( 9!) */ + ( real_type )+0.000000000000175676753201485155458309804472786672148731, /* F^(10)(x_85) / (10!) */ + ( real_type )-5.11387498373378230050823323899158005601582e-14, /* F^(11)(x_85) / (11!) */ + ( real_type )+1.24881091228611939181807633546592467012668e-14, /* F^(12)(x_85) / (12!) */ + ( real_type )-2.59419957304911523649337121793437835941356e-15, /* F^(13)(x_85) / (13!) */ + ( real_type )+4.63896886703409362153877151087410437559077e-16, /* F^(14)(x_85) / (14!) */ + ( real_type )-7.21423354660082019575264632812165138069598e-17, /* F^(15)(x_85) / (15!) */ + ( real_type )+9.84407538457448091107481053303527029747963e-18, /* F^(16)(x_85) / (16!) */ + ( real_type )-1.18787711068637365684225704893415676582388e-18, /* F^(17)(x_85) / (17!) */ + ( real_type )+1.27640031102326277844042628828040971182793e-19, /* F^(18)(x_85) / (18!) */ + ( real_type )-1.22885960901847652785931892456736968560141e-20, /* F^(19)(x_85) / (19!) */ + ( real_type )+1.06591648409486685469769262717429610244758e-21, /* F^(20)(x_85) / (20!) */ + ( real_type )-8.37182094060339455646831803232320907123064e-23, /* F^(21)(x_85) / (21!) */ + ( real_type )+5.98081569586445369446831846192095623219574e-24, /* F^(22)(x_85) / (22!) */ + ( real_type )-3.9024480084615253182650609213433762273434e-25, /* F^(23)(x_85) / (23!) */ + ( real_type )+2.33447657989725422256540370816675930386801e-26, /* F^(24)(x_85) / (24!) */ + + /* ===== n = 86, xn = 18.11, yn = 0 ==== */ + ( real_type )+0.0276585969117343482506150690312596643118447, /* F^(00)(x_86) / ( 0!) */ + ( real_type )-0.00153235133017005299182705184311512932635817, /* F^(01)(x_86) / ( 1!) */ + ( real_type )+0.0000425135856722525288106033728368653814326842, /* F^(02)(x_86) / ( 2!) */ + ( real_type )-0.000000787553621513053045319013898038778517518631, /* F^(03)(x_86) / ( 3!) */ + ( real_type )+0.0000000109587930453348030398805776993707074807841, /* F^(04)(x_86) / ( 4!) */ + ( real_type )-0.000000000121874128034400689066277250463439222306692, /* F^(05)(x_86) / ( 5!) */ + ( real_type )+0.000000000000822475063045567802051267902683035214989751, /* F^(06)(x_86) / ( 6!) */ + ( real_type )+0.000000000000221274580210369209245177992425648657155589, /* F^(07)(x_86) / ( 7!) */ + ( real_type )-0.000000000000128866591951229987686262431941582715452921, /* F^(08)(x_86) / ( 8!) */ + ( real_type )+5.69260305435003086661970980627441197728893e-14, /* F^(09)(x_86) / ( 9!) */ + ( real_type )-2.03268450661524655443247099203779973762983e-14, /* F^(10)(x_86) / (10!) */ + ( real_type )+5.98893018244784457950025592532040422154978e-15, /* F^(11)(x_86) / (11!) */ + ( real_type )-1.4803230830758736271496086506064063086628e-15, /* F^(12)(x_86) / (12!) */ + ( real_type )+3.11272916717050612297995082568350552057164e-16, /* F^(13)(x_86) / (13!) */ + ( real_type )-5.63449700479306244795551942292466233328842e-17, /* F^(14)(x_86) / (14!) */ + ( real_type )+8.87028204956424118470907215913022364176789e-18, /* F^(15)(x_86) / (15!) */ + ( real_type )-1.22533171449673248985407091274704633241145e-18, /* F^(16)(x_86) / (16!) */ + ( real_type )+1.49692460336193198704498321367394667663312e-19, /* F^(17)(x_86) / (17!) */ + ( real_type )-1.62848336481026618047269390375699398356415e-20, /* F^(18)(x_86) / (18!) */ + ( real_type )+1.58739584977829628542053053490115369833535e-21, /* F^(19)(x_86) / (19!) */ + ( real_type )-1.39415615440955590384538609850771560824484e-22, /* F^(20)(x_86) / (20!) */ + ( real_type )+1.10874676344192242833799942079220789305513e-23, /* F^(21)(x_86) / (21!) */ + ( real_type )-8.02077191922640459870991270564291083212453e-25, /* F^(22)(x_86) / (22!) */ + ( real_type )+5.29975154296413522928437849390627244895023e-26, /* F^(23)(x_86) / (23!) */ + ( real_type )-3.2106286186816059433937700995864276256055e-27, /* F^(24)(x_86) / (24!) */ + + /* ===== n = 87, xn = 18.32, yn = 0 ==== */ + ( real_type )+0.0273397215685935480035125872749087135987964, /* F^(00)(x_87) / ( 0!) */ + ( real_type )-0.00149716903900566642637423680144849314471187, /* F^(01)(x_87) / ( 1!) */ + ( real_type )+0.0000410556781761699063257613029611447231034888, /* F^(02)(x_87) / ( 2!) */ + ( real_type )-0.000000751697482494678807930512006168692953326387, /* F^(03)(x_87) / ( 3!) */ + ( real_type )+0.0000000103383473184569806682230525268361826153178, /* F^(04)(x_87) / ( 4!) */ + ( real_type )-0.000000000114449789361282544458942483302554201253039, /* F^(05)(x_87) / ( 5!) */ + ( real_type )+0.00000000000158715464207542477484910757406575867292813, /* F^(06)(x_87) / ( 6!) */ + ( real_type )-0.000000000000407960220117836835186145735465356976799966, /* F^(07)(x_87) / ( 7!) */ + ( real_type )+0.000000000000226418035246150697481501207538279980568924, /* F^(08)(x_87) / ( 8!) */ + ( real_type )-0.000000000000101136544175283395712032833770486632300719, /* F^(09)(x_87) / ( 9!) */ + ( real_type )+3.65447618347731816390519783691883742145053e-14, /* F^(10)(x_87) / (10!) */ + ( real_type )-1.08964051192414333101692464185881351798075e-14, /* F^(11)(x_87) / (11!) */ + ( real_type )+2.72575015427934027297427854400168081426483e-15, /* F^(12)(x_87) / (12!) */ + ( real_type )-5.80073754126696600290455042194163067743266e-16, /* F^(13)(x_87) / (13!) */ + ( real_type )+1.06273833377586867446488469043161119618953e-16, /* F^(14)(x_87) / (14!) */ + ( real_type )-1.69338250588870822129039258629143073381539e-17, /* F^(15)(x_87) / (15!) */ + ( real_type )+2.36774572295001125406074294794434978475196e-18, /* F^(16)(x_87) / (16!) */ + ( real_type )-2.92794243062893983091608659009086755358324e-19, /* F^(17)(x_87) / (17!) */ + ( real_type )+3.22436947831676495621687786748222385137703e-20, /* F^(18)(x_87) / (18!) */ + ( real_type )-3.18173062972521346845443864960826923036456e-21, /* F^(19)(x_87) / (19!) */ + ( real_type )+2.82894359042422772559466972905046600403534e-22, /* F^(20)(x_87) / (20!) */ + ( real_type )-2.27770838640706917317767892646349894471824e-23, /* F^(21)(x_87) / (21!) */ + ( real_type )+1.66821947489407990226836153943230679159786e-24, /* F^(22)(x_87) / (22!) */ + ( real_type )-1.11604668068954978838604930361913758105789e-25, /* F^(23)(x_87) / (23!) */ + ( real_type )+6.84582056636892997080562550842561979342218e-27, /* F^(24)(x_87) / (24!) */ + + /* ===== n = 88, xn = 18.53, yn = 0 ==== */ + ( real_type )+0.0270281257693319151623337823932836698764564, /* F^(00)(x_88) / ( 0!) */ + ( real_type )-0.00146318640050889697202736153885080647486236, /* F^(01)(x_88) / ( 1!) */ + ( real_type )+0.0000396637726796149174320725024842152123145938, /* F^(02)(x_88) / ( 2!) */ + ( real_type )-0.000000717861735554739947201137569206509072870896, /* F^(03)(x_88) / ( 3!) */ + ( real_type )+0.00000000975945577439031499848731270941825077420724, /* F^(04)(x_88) / ( 4!) */ + ( real_type )-0.000000000107306057685794816641426228565628081768519, /* F^(05)(x_88) / ( 5!) */ + ( real_type )+0.00000000000200526421846953212994456138370530112978346, /* F^(06)(x_88) / ( 6!) */ + ( real_type )-0.000000000000786359804288417656009640581890559222259799, /* F^(07)(x_88) / ( 7!) */ + ( real_type )+0.000000000000446308866920991558308910304466548706678654, /* F^(08)(x_88) / ( 8!) */ + ( real_type )-0.000000000000201732445251467242279589339455323633143667, /* F^(09)(x_88) / ( 9!) */ + ( real_type )+7.37553822321812175128636220946188028905858e-14, /* F^(10)(x_88) / (10!) */ + ( real_type )-2.22519381468029594191594576755626313461929e-14, /* F^(11)(x_88) / (11!) */ + ( real_type )+5.63251942065454673156902397625203656386334e-15, /* F^(12)(x_88) / (12!) */ + ( real_type )-1.2129645597454349696718836078874284472836e-15, /* F^(13)(x_88) / (13!) */ + ( real_type )+2.24882589292021575535121402385832687458016e-16, /* F^(14)(x_88) / (14!) */ + ( real_type )-3.62631603473991346551092980488076347412862e-17, /* F^(15)(x_88) / (15!) */ + ( real_type )+5.13148896484975407822827125046116636004444e-18, /* F^(16)(x_88) / (16!) */ + ( real_type )-6.42222404922446562031052027270642268611613e-19, /* F^(17)(x_88) / (17!) */ + ( real_type )+7.15812517815956705052646127706609601422934e-20, /* F^(18)(x_88) / (18!) */ + ( real_type )-7.14935098402202624212989336324824773963307e-21, /* F^(19)(x_88) / (19!) */ + ( real_type )+6.43418498577409912716538419214794475516955e-22, /* F^(20)(x_88) / (20!) */ + ( real_type )-5.2438579320784669647863689992958873186754e-23, /* F^(21)(x_88) / (21!) */ + ( real_type )+3.88782935506399785977890845551357905269524e-24, /* F^(22)(x_88) / (22!) */ + ( real_type )-2.63302769679368782104944561468279752168306e-25, /* F^(23)(x_88) / (23!) */ + ( real_type )+1.63506743069695031356967665231689442398943e-26, /* F^(24)(x_88) / (24!) */ + + /* ===== n = 89, xn = 18.74, yn = 0 ==== */ + ( real_type )+0.0267235626949087363281474632612449247565235, /* F^(00)(x_89) / ( 0!) */ + ( real_type )-0.00143034940921157294085931116137922505021846, /* F^(01)(x_89) / ( 1!) */ + ( real_type )+0.0000383341704224728038912933775528873415381644, /* F^(02)(x_89) / ( 2!) */ + ( real_type )-0.000000685909741407894441594422514009819611221158, /* F^(03)(x_89) / ( 3!) */ + ( real_type )+0.00000000921571446633327865619198930610128038695535, /* F^(04)(x_89) / ( 4!) */ + ( real_type )-0.0000000000956761192722043728389539473752315689534906, /* F^(05)(x_89) / ( 5!) */ + ( real_type )-0.00000000000280414184550205509556043696906383890290111, /* F^(06)(x_89) / ( 6!) */ + ( real_type )+0.00000000000279537889300546818779429332771406189938939, /* F^(07)(x_89) / ( 7!) */ + ( real_type )-0.00000000000162424941407544350050075165453501566156168, /* F^(08)(x_89) / ( 8!) */ + ( real_type )+0.000000000000742811912332644428448693853180104809966258, /* F^(09)(x_89) / ( 9!) */ + ( real_type )-0.000000000000274749547163316874238378488092431789382745, /* F^(10)(x_89) / (10!) */ + ( real_type )+8.38621106191463948347953671484515565750261e-14, /* F^(11)(x_89) / (11!) */ + ( real_type )-2.14768596350660722360374787895861870878896e-14, /* F^(12)(x_89) / (12!) */ + ( real_type )+4.67952684829432561295877739733364200785481e-15, /* F^(13)(x_89) / (13!) */ + ( real_type )-8.77831526102614357268363816713721033752758e-16, /* F^(14)(x_89) / (14!) */ + ( real_type )+1.43231455838858402093772916253089060102726e-16, /* F^(15)(x_89) / (15!) */ + ( real_type )-2.05092427407772527172927052762409869856136e-17, /* F^(16)(x_89) / (16!) */ + ( real_type )+2.59741506534622380955851877118659777792209e-18, /* F^(17)(x_89) / (17!) */ + ( real_type )-2.92968703195363268357450313590181105776592e-19, /* F^(18)(x_89) / (18!) */ + ( real_type )+2.96122298980141182514929072579309463535851e-20, /* F^(19)(x_89) / (19!) */ + ( real_type )-2.69710135276254279441223058771405126063628e-21, /* F^(20)(x_89) / (20!) */ + ( real_type )+2.22469617658515766394027470865145202842302e-22, /* F^(21)(x_89) / (21!) */ + ( real_type )-1.66939863699804834738948728490477061744821e-23, /* F^(22)(x_89) / (22!) */ + ( real_type )+1.14434907891729764018473203093674430570523e-24, /* F^(23)(x_89) / (23!) */ + ( real_type )-7.19293837174644331999549524022314556139464e-26, /* F^(24)(x_89) / (24!) */ + + /* ===== n = 90, xn = 18.95, yn = 0 ==== */ + ( real_type )+0.026425796575435644735625484197407954040757, /* F^(00)(x_90) / ( 0!) */ + ( real_type )-0.00139860706914014250093290284943945535148289, /* F^(01)(x_90) / ( 1!) */ + ( real_type )+0.0000370634199256318581093973588777640813588693, /* F^(02)(x_90) / ( 2!) */ + ( real_type )-0.000000655719490013030492569830444040536878223333, /* F^(03)(x_90) / ( 3!) */ + ( real_type )+0.00000000871068119970958769048381990591428640383214, /* F^(04)(x_90) / ( 4!) */ + ( real_type )-0.0000000000891690708781325528211812964535210212035777, /* F^(05)(x_90) / ( 5!) */ + ( real_type )-0.00000000000292316679172139398406629121809954854385794, /* F^(06)(x_90) / ( 6!) */ + ( real_type )+0.00000000000286725836620219441843797015332663858812168, /* F^(07)(x_90) / ( 7!) */ + ( real_type )-0.00000000000168466891771306928531948689628368543521632, /* F^(08)(x_90) / ( 8!) */ + ( real_type )+0.000000000000779299638410623369837627859485278598069599, /* F^(09)(x_90) / ( 9!) */ + ( real_type )-0.000000000000291569838481154526609543049657554037337429, /* F^(10)(x_90) / (10!) */ + ( real_type )+9.00256394434410064287349081883380211888413e-14, /* F^(11)(x_90) / (11!) */ + ( real_type )-2.33228140128179439996478400755279431294144e-14, /* F^(12)(x_90) / (12!) */ + ( real_type )+5.14087352776643470371856068777662257802492e-15, /* F^(13)(x_90) / (13!) */ + ( real_type )-9.75632266533230256050077581472478946007456e-16, /* F^(14)(x_90) / (14!) */ + ( real_type )+1.61052966626563670606978221984236843227428e-16, /* F^(15)(x_90) / (15!) */ + ( real_type )-2.33319354912092208813520180700576599192135e-17, /* F^(16)(x_90) / (16!) */ + ( real_type )+2.98970911635996157177470365563118036253395e-18, /* F^(17)(x_90) / (17!) */ + ( real_type )-3.41201563616366398692438819262220461158582e-19, /* F^(18)(x_90) / (18!) */ + ( real_type )+3.48962702535713793573360808976441133273658e-20, /* F^(19)(x_90) / (19!) */ + ( real_type )-3.21617255991298136266760446106135364404859e-21, /* F^(20)(x_90) / (20!) */ + ( real_type )+2.68449854356792072744181012828248097267732e-22, /* F^(21)(x_90) / (21!) */ + ( real_type )-2.03854017305557412521950027113028315674188e-23, /* F^(22)(x_90) / (22!) */ + ( real_type )+1.41416805630577155225359539851987102933893e-24, /* F^(23)(x_90) / (23!) */ + ( real_type )-8.99598658254769613242703761469231999000116e-26, /* F^(24)(x_90) / (24!) */ + + /* ===== n = 91, xn = 19.16, yn = 0 ==== */ + ( real_type )+0.0261346020778743874048668010927461155846186, /* F^(00)(x_91) / ( 0!) */ + ( real_type )-0.00136791119434481215672626407862719593000758, /* F^(01)(x_91) / ( 1!) */ + ( real_type )+0.0000358482963657420083877774314236579984261137, /* F^(02)(x_91) / ( 2!) */ + ( real_type )-0.00000062717579946636001437679821672857124533017, /* F^(03)(x_91) / ( 3!) */ + ( real_type )+0.00000000824197821966421655328662498565392453164864, /* F^(04)(x_91) / ( 4!) */ + ( real_type )-0.0000000000884001031265867249140939201736755621311687, /* F^(05)(x_91) / ( 5!) */ + ( real_type )+0.00000000000250912369401620049531413393256948159829401, /* F^(06)(x_91) / ( 6!) */ + ( real_type )-0.00000000000136066028941146915846961911371030130455119, /* F^(07)(x_91) / ( 7!) */ + ( real_type )+0.000000000000803404386098241337554993318262004174496712, /* F^(08)(x_91) / ( 8!) */ + ( real_type )-0.000000000000375838373427430237373530465313493726270554, /* F^(09)(x_91) / ( 9!) */ + ( real_type )+0.000000000000142220096843554936138143685883742838074453, /* F^(10)(x_91) / (10!) */ + ( real_type )-4.4414112519827640043090583199280391959306e-14, /* F^(11)(x_91) / (11!) */ + ( real_type )+1.16382193440565060238007511049831479483391e-14, /* F^(12)(x_91) / (12!) */ + ( real_type )-2.59482372869106644891534836396118275100972e-15, /* F^(13)(x_91) / (13!) */ + ( real_type )+4.98123594010949811782948309420763566521555e-16, /* F^(14)(x_91) / (14!) */ + ( real_type )-8.31791540262212074004068088689419471461346e-17, /* F^(15)(x_91) / (15!) */ + ( real_type )+1.21900721682280615110988025094123142426544e-17, /* F^(16)(x_91) / (16!) */ + ( real_type )-1.58018977374936299396914044840160272201932e-18, /* F^(17)(x_91) / (17!) */ + ( real_type )+1.82444730129662571223622093483602655766964e-19, /* F^(18)(x_91) / (18!) */ + ( real_type )-1.88779424617827526920337986115885268711837e-20, /* F^(19)(x_91) / (19!) */ + ( real_type )+1.76029640156822591308572074709294362813689e-21, /* F^(20)(x_91) / (20!) */ + ( real_type )-1.48660662330144535088235536205877021594128e-22, /* F^(21)(x_91) / (21!) */ + ( real_type )+1.14223210010752954554786324071334027191472e-23, /* F^(22)(x_91) / (22!) */ + ( real_type )-8.01778149549463258672440601694260391740796e-25, /* F^(23)(x_91) / (23!) */ + ( real_type )+5.16102733998194354717961681400900264466447e-26, /* F^(24)(x_91) / (24!) */ + + /* ===== n = 92, xn = 19.37, yn = 0 ==== */ + ( real_type )+0.0258497637340652429455729256209668852097065, /* F^(00)(x_92) / ( 0!) */ + ( real_type )-0.00133821622484308548779235965695902182101004, /* F^(01)(x_92) / ( 1!) */ + ( real_type )+0.000034685784079364178615886309730333244168244, /* F^(02)(x_92) / ( 2!) */ + ( real_type )-0.000000600168481832812554270007964610166683795762, /* F^(03)(x_92) / ( 3!) */ + ( real_type )+0.00000000779847902523171012186092779869097584869321, /* F^(04)(x_92) / ( 4!) */ + ( real_type )-0.0000000000801683898080406920274546999599746853244881, /* F^(05)(x_92) / ( 5!) */ + ( real_type )-0.00000000000038670422959509965433781031635045473224192, /* F^(06)(x_92) / ( 6!) */ + ( real_type )+0.000000000000851071373182510983550457587162979891321736, /* F^(07)(x_92) / ( 7!) */ + ( real_type )-0.000000000000513395788758421376651379225689367822451512, /* F^(08)(x_92) / ( 8!) */ + ( real_type )+0.000000000000242895848930202210783400879692734519655548, /* F^(09)(x_92) / ( 9!) */ + ( real_type )-9.29493019497566910165839605953840948796612e-14, /* F^(10)(x_92) / (10!) */ + ( real_type )+2.93552335837695326345299515927155143871211e-14, /* F^(11)(x_92) / (11!) */ + ( real_type )-7.77936927233211654891440788969561029535646e-15, /* F^(12)(x_92) / (12!) */ + ( real_type )+1.75417550443927152643493702094772673305768e-15, /* F^(13)(x_92) / (13!) */ + ( real_type )-3.40583640298403895192030155044762198418748e-16, /* F^(14)(x_92) / (14!) */ + ( real_type )+5.75223919877370323170302204878623715934416e-17, /* F^(15)(x_92) / (15!) */ + ( real_type )-8.52665884338373742642405184662238051135127e-18, /* F^(16)(x_92) / (16!) */ + ( real_type )+1.11801224326344112793837194877910667487372e-18, /* F^(17)(x_92) / (17!) */ + ( real_type )-1.30571380878182865454205983668395018892187e-19, /* F^(18)(x_92) / (18!) */ + ( real_type )+1.36667572907465926517947581035204005293292e-20, /* F^(19)(x_92) / (19!) */ + ( real_type )-1.28915665845172715311899721756453103090923e-21, /* F^(20)(x_92) / (20!) */ + ( real_type )+1.10138735143579502267235102343288358058546e-22, /* F^(21)(x_92) / (21!) */ + ( real_type )-8.5612602651152630480624362005967290286555e-24, /* F^(22)(x_92) / (22!) */ + ( real_type )+6.07984073395185991668766333510842717944784e-25, /* F^(23)(x_92) / (23!) */ + ( real_type )-3.95953570126149264706601614226906956766217e-26, /* F^(24)(x_92) / (24!) */ + + /* ===== n = 93, xn = 19.58, yn = 0 ==== */ + ( real_type )+0.0255710754060192979083778000275314785319582, /* F^(00)(x_93) / ( 0!) */ + ( real_type )-0.00130947905675559852521852358051369963646505, /* F^(01)(x_93) / ( 1!) */ + ( real_type )+0.000033573063124104172544301898927670367957925, /* F^(02)(x_93) / ( 2!) */ + ( real_type )-0.000000574601672380881426619510930411878873846951, /* F^(03)(x_93) / ( 3!) */ + ( real_type )+0.00000000738435749799199757968962894888055540690933, /* F^(04)(x_93) / ( 4!) */ + ( real_type )-0.0000000000742022966056258507802518048111803385109874, /* F^(05)(x_93) / ( 5!) */ + ( real_type )-0.00000000000133714665227127531550736158046666401682976, /* F^(06)(x_93) / ( 6!) */ + ( real_type )+0.00000000000157334585154900732886367663851873912367207, /* F^(07)(x_93) / ( 7!) */ + ( real_type )-0.000000000000956669833421160850440238489390760962337955, /* F^(08)(x_93) / ( 8!) */ + ( real_type )+0.000000000000457627656446059675329823863546195739324338, /* F^(09)(x_93) / ( 9!) */ + ( real_type )-0.000000000000177071423034667325663067083987636845489078, /* F^(10)(x_93) / (10!) */ + ( real_type )+5.65472612628523074031716596112519628240052e-14, /* F^(11)(x_93) / (11!) */ + ( real_type )-1.51533118055510996303276551911160748886355e-14, /* F^(12)(x_93) / (12!) */ + ( real_type )+3.45530914232253246065351542929750259093509e-15, /* F^(13)(x_93) / (13!) */ + ( real_type )-6.78425270096930305281884087243076697860606e-16, /* F^(14)(x_93) / (14!) */ + ( real_type )+1.15875954317817365979985406707134312934597e-16, /* F^(15)(x_93) / (15!) */ + ( real_type )-1.73711004649555697133273995922741215118647e-17, /* F^(16)(x_93) / (16!) */ + ( real_type )+2.30356826705921084656883493505863830383198e-18, /* F^(17)(x_93) / (17!) */ + ( real_type )-2.72096372970383606529484213156877922659577e-19, /* F^(18)(x_93) / (18!) */ + ( real_type )+2.8805454574453086802981883427173783146637e-20, /* F^(19)(x_93) / (19!) */ + ( real_type )-2.74829808650688525468869710175530561222062e-21, /* F^(20)(x_93) / (20!) */ + ( real_type )+2.37498915463618764514607732454322231394718e-22, /* F^(21)(x_93) / (21!) */ + ( real_type )-1.86739976382915847937558819343931240100724e-23, /* F^(22)(x_93) / (22!) */ + ( real_type )+1.34148132956439941036569944490046045397679e-24, /* F^(23)(x_93) / (23!) */ + ( real_type )-8.83780534946721688016289451483283019911939e-26, /* F^(24)(x_93) / (24!) */ + + /* ===== n = 94, xn = 19.79, yn = 0 ==== */ + ( real_type )+0.0252983397856513709121225761435040371463168, /* F^(00)(x_94) / ( 0!) */ + ( real_type )-0.00128165888472782047865654035918555960932361, /* F^(01)(x_94) / ( 1!) */ + ( real_type )+0.000032507492902220880283120027731696247770022, /* F^(02)(x_94) / ( 2!) */ + ( real_type )-0.000000550385103166724419279601764454523686345349, /* F^(03)(x_94) / ( 3!) */ + ( real_type )+0.00000000700006848655338444965693616897135789213882, /* F^(04)(x_94) / ( 4!) */ + ( real_type )-0.0000000000745116248510788976538601138246805492376021, /* F^(05)(x_94) / ( 5!) */ + ( real_type )+0.00000000000414067454708421394097938731274075003710474, /* F^(06)(x_94) / ( 6!) */ + ( real_type )-0.00000000000283768023847171014965876462215397649076936, /* F^(07)(x_94) / ( 7!) */ + ( real_type )+0.0000000000017363960458794502188933056372318580317699, /* F^(08)(x_94) / ( 8!) */ + ( real_type )-0.00000000000083969515618492903249189026832779490926892, /* F^(09)(x_94) / ( 9!) */ + ( real_type )+0.000000000000328483846041881864650245283629188660968791, /* F^(10)(x_94) / (10!) */ + ( real_type )-0.000000000000106058725806869248173675463123847599394408, /* F^(11)(x_94) / (11!) */ + ( real_type )+2.87358916043604067740628765995734090337016e-14, /* F^(12)(x_94) / (12!) */ + ( real_type )-6.62520643249992856447473362737104492795688e-15, /* F^(13)(x_94) / (13!) */ + ( real_type )+1.3152948484498388464270968700938396243084e-15, /* F^(14)(x_94) / (14!) */ + ( real_type )-2.27162344528736915435442139746813586297875e-16, /* F^(15)(x_94) / (15!) */ + ( real_type )+3.44354446551954250986962425463615746405287e-17, /* F^(16)(x_94) / (16!) */ + ( real_type )-4.6177278842562750535857298303797462923481e-18, /* F^(17)(x_94) / (17!) */ + ( real_type )+5.51585123795386489317681213648329109919692e-19, /* F^(18)(x_94) / (18!) */ + ( real_type )-5.90528468362136702002546246424350462184454e-20, /* F^(19)(x_94) / (19!) */ + ( real_type )+5.69796981226867125752787373664572898673375e-21, /* F^(20)(x_94) / (20!) */ + ( real_type )-4.97991751979619797822632728692799015501915e-22, /* F^(21)(x_94) / (21!) */ + ( real_type )+3.96019122931085222013384972398636044910892e-23, /* F^(22)(x_94) / (22!) */ + ( real_type )-2.87737244779787519335285454502999901846571e-24, /* F^(23)(x_94) / (23!) */ + ( real_type )+1.91735639103316168428747130240908382007016e-25, /* F^(24)(x_94) / (24!) */ + + /* ===== n = 95, xn = 20.00, yn = 0 ==== */ + ( real_type )+0.0250313679264036719469949523478235318685783, /* F^(00)(x_95) / ( 0!) */ + ( real_type )-0.00125471705614687787979809391294127474313247, /* F^(01)(x_95) / ( 1!) */ + ( real_type )+0.0000314865982669428244834629555009814970355676, /* F^(02)(x_95) / ( 2!) */ + ( real_type )-0.000000527430503426122171158256344220570921136715, /* F^(03)(x_95) / ( 3!) */ + ( real_type )+0.00000000663466410935440775135104801053159302647305, /* F^(04)(x_95) / ( 4!) */ + ( real_type )-0.0000000000668525064446089789965499324391304196225676, /* F^(05)(x_95) / ( 5!) */ + ( real_type )+0.00000000000056207261229433498115494703757165484204121, /* F^(06)(x_95) / ( 6!) */ + ( real_type )-4.05583014089264450369872156100790083250098e-15, /* F^(07)(x_95) / ( 7!) */ + ( real_type )+2.56411046010502203699731293279074754862805e-17, /* F^(08)(x_95) / ( 8!) */ + ( real_type )-1.44279615047502331780799788448442115135487e-19, /* F^(09)(x_95) / ( 9!) */ + ( real_type )+7.31613572222665223490739095138011640238271e-22, /* F^(10)(x_95) / (10!) */ + ( real_type )-3.37702379674498002867443070935572382176358e-24, /* F^(11)(x_95) / (11!) */ + ( real_type )+1.43076553803413924465096728791005057445911e-26, /* F^(12)(x_95) / (12!) */ + ( real_type )-5.60290873145504521576832165960528174933747e-29, /* F^(13)(x_95) / (13!) */ + ( real_type )+2.04008886531966337902640059555123079816606e-31, /* F^(14)(x_95) / (14!) */ + ( real_type )-6.94222802793728542892698463099457050968224e-34, /* F^(15)(x_95) / (15!) */ + ( real_type )+2.21768453445428383602577050766356014263607e-36, /* F^(16)(x_95) / (16!) */ + ( real_type )-6.67657795486257985966613293809783506717921e-39, /* F^(17)(x_95) / (17!) */ + ( real_type )+1.90094299923723007482624151378735585348079e-41, /* F^(18)(x_95) / (18!) */ + ( real_type )-5.13440573688668543269733799743447230999221e-44, /* F^(19)(x_95) / (19!) */ + ( real_type )+1.31924159946606288417755066415114800832222e-46, /* F^(20)(x_95) / (20!) */ + ( real_type )-3.23266804937791619076790610363083272781569e-49, /* F^(21)(x_95) / (21!) */ + ( real_type )+7.5716216593195426888960840955926283668573e-52, /* F^(22)(x_95) / (22!) */ + ( real_type )-1.6986678247582504269869547762489045128291e-54, /* F^(23)(x_95) / (23!) */ + ( real_type )+3.65716084795303857554208748720308563038309e-57, /* F^(24)(x_95) / (24!) */ +}; + +#elif !defined( _GPUCODE ) + + #error "precomputed values are given for SIXTRL_CERRF_DAWSON_N_XN = 96" \ + "and SIXTRL_CERRF_DAWSON_N_TAYLOR_COEFF_PER_XN = 25 " \ + "-> provide your own data tables for other configurations" + +#endif /* !defined( _GPUCODE ) && ( N = 96 ) && ( NT_MAX = 25 ) */ From c411c9bb93bd288cb05d1c9793a9e3d10d72dd2f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:11:51 +0200 Subject: [PATCH 61/77] common: adds precomputed and tabulated value for the Abrarov & Quine 2011 based algorithm --- .../common/be_beamfields/abq2011_coeff.h | 28 ++ .../be_beamfields/abq2011_coeff_nf24_tm12.c | 50 +++ .../abq2011_coeff_poles_nf24_tm12_nt6.c | 342 ++++++++++++++++++ 3 files changed, 420 insertions(+) create mode 100644 sixtracklib/common/be_beamfields/abq2011_coeff.h create mode 100644 sixtracklib/common/be_beamfields/abq2011_coeff_nf24_tm12.c create mode 100644 sixtracklib/common/be_beamfields/abq2011_coeff_poles_nf24_tm12_nt6.c diff --git a/sixtracklib/common/be_beamfields/abq2011_coeff.h b/sixtracklib/common/be_beamfields/abq2011_coeff.h new file mode 100644 index 000000000..3579e3eee --- /dev/null +++ b/sixtracklib/common/be_beamfields/abq2011_coeff.h @@ -0,0 +1,28 @@ +#ifndef SIXTRACKLIB_COMMON_BE_BEAMFIELDS_ABQ2011_COEFF_H__ +#define SIXTRACKLIB_COMMON_BE_BEAMFIELDS_ABQ2011_COEFF_H__ + +#if !defined( SIXTRL_NO_SYSTEM_INCLUDES ) + #include + #include +#endif /* !defined( SIXTRL_NO_SYSTEM_INCLUDES ) */ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/definitions.h" + #include "sixtracklib/common/be_beamfields/definitions.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +extern "C" { +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ + +SIXTRL_EXTERN SIXTRL_REAL_T const + NS(CERRF_ABQ2011_FOURIER_COEFF)[ SIXTRL_CERRF_ABQ2011_N_FOURIER ]; + +SIXTRL_EXTERN SIXTRL_REAL_T const + NS(CERRF_ABQ2011_ROOT_TAYLOR_COEFF)[ SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF ]; + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +} +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ + +#endif /* SIXTRACKLIB_COMMON_BE_BEAMFIELDS_ABQ2011_COEFF_H__ */ diff --git a/sixtracklib/common/be_beamfields/abq2011_coeff_nf24_tm12.c b/sixtracklib/common/be_beamfields/abq2011_coeff_nf24_tm12.c new file mode 100644 index 000000000..2e3ba6999 --- /dev/null +++ b/sixtracklib/common/be_beamfields/abq2011_coeff_nf24_tm12.c @@ -0,0 +1,50 @@ +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/be_beamfields/definitions.h" + #include "sixtracklib/common/be_beamfields/abq2011_coeff.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) && \ + defined( SIXTRL_CERRF_ABQ2011_N_FOURIER ) && \ + ( SIXTRL_CERRF_ABQ2011_N_FOURIER == 24 ) && \ + defined( SIXTRL_CERRF_ABQ2011_TM ) && ( SIXTRL_CERRF_ABQ2011_TM == 12 ) + +typedef SIXTRL_REAL_T real_type; + +real_type const NS(CERRF_ABQ2011_FOURIER_COEFF)[ SIXTRL_CERRF_ABQ2011_N_FOURIER ] = { + ( real_type )0.295408975150919337883027913890, /* a_n[ 0 ] */ + ( real_type )0.275840233292177084395258287749, /* a_n[ 1 ] */ + ( real_type )0.224573955224615866231619198223, /* a_n[ 2 ] */ + ( real_type )0.159414938273911722757388079389, /* a_n[ 3 ] */ + ( real_type )0.0986657664154541891084237249422, /* a_n[ 4 ] */ + ( real_type )0.0532441407876394120414705837561, /* a_n[ 5 ] */ + ( real_type )0.0250521500053936483557475638078, /* a_n[ 6 ] */ + ( real_type )0.0102774656705395362477551802420, /* a_n[ 7 ] */ + ( real_type )0.00367616433284484706364335443079, /* a_n[ 8 ] */ + ( real_type )0.00114649364124223317199757239908, /* a_n[ 9 ] */ + ( real_type )0.000311757015046197600406683642851, /* a_n[ 10 ] */ + ( real_type )0.0000739143342960301487751427184143, /* a_n[ 11 ] */ + ( real_type )0.0000152794934280083634658979605774, /* a_n[ 12 ] */ + ( real_type )0.00000275395660822107093261423133381, /* a_n[ 13 ] */ + ( real_type )4.32785878190124505246159684324E-7, /* a_n[ 14 ] */ + ( real_type )5.93003040874588104132914772669E-8, /* a_n[ 15 ] */ + ( real_type )7.08449030774820424708618991843E-9, /* a_n[ 16 ] */ + ( real_type )7.37952063581678039121116498488E-10, /* a_n[ 17 ] */ + ( real_type )6.70217160600200763046136003593E-11, /* a_n[ 18 ] */ + ( real_type )5.30726516347079017807414252726E-12, /* a_n[ 19 ] */ + ( real_type )3.66432411346763916925386157070E-13, /* a_n[ 20 ] */ + ( real_type )2.20589494494103134281934595834E-14, /* a_n[ 21 ] */ + ( real_type )1.15782686262855878932236226031E-15, /* a_n[ 22 ] */ + ( real_type )5.29871142946730483659082681849E-17, /* a_n[ 23 ] */ +}; + +#elif !defined( _GPUCODE ) && \ + ( !defined( SIXTRL_CERRF_ABQ2011_N_FOURIER ) || \ + ( SIXTRL_CERRF_ABQ2011_N_FOURIER != 24 ) || \ + !defined( SIXTRL_CERRF_ABQ2011_TM ) || \ + ( SIXTRL_CERRF_ABQ2011_TM != 12 ) ) + + #error "precomputed fourier coefficients only provided for " \ + "SIXTRL_CERRF_ABQ2011_N_FOURIER == 24 and SIXTRL_CERRF_ABQ2011_TM == 12" \ + "-> provide your own tabulated data for other configurations" + +#endif /* !defined( _GPUCODE ) */ diff --git a/sixtracklib/common/be_beamfields/abq2011_coeff_poles_nf24_tm12_nt6.c b/sixtracklib/common/be_beamfields/abq2011_coeff_poles_nf24_tm12_nt6.c new file mode 100644 index 000000000..549077949 --- /dev/null +++ b/sixtracklib/common/be_beamfields/abq2011_coeff_poles_nf24_tm12_nt6.c @@ -0,0 +1,342 @@ +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/be_beamfields/definitions.h" + #include "sixtracklib/common/be_beamfields/abq2011_coeff.h" +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) && \ + defined( SIXTRL_CERRF_ABQ2011_N_FOURIER ) && \ + ( SIXTRL_CERRF_ABQ2011_N_FOURIER == 24 ) && \ + ( SIXTRL_CERRF_ABQ2011_N_FOURIER == 24 ) && \ + defined( SIXTRL_CERRF_ABQ2011_TM ) && ( SIXTRL_CERRF_ABQ2011_TM == 12 ) && \ + defined( SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF ) && \ + ( SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF == 288 ) + +typedef SIXTRL_REAL_T real_type; + +real_type const NS(CERRF_ABQ2011_ROOT_TAYLOR_COEFF)[ SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF ] = { + /* ======= pole nn = 0 ====== */ + ( real_type )+1.0, /* real part, component ii = 0 */ + ( real_type )+0.0, /* imag part, component ii = 0 */ + ( real_type )+0.0, /* real part, component ii = 1 */ + ( real_type )+1.1283791670955125738961589031215, /* imag part, component ii = 1 */ + ( real_type )-1.0, /* real part, component ii = 2 */ + ( real_type )+0.0, /* imag part, component ii = 2 */ + ( real_type )+0.0, /* real part, component ii = 3 */ + ( real_type )-0.75225277806367504926410593541436, /* imag part, component ii = 3 */ + ( real_type )+0.5, /* real part, component ii = 4 */ + ( real_type )+0.0, /* imag part, component ii = 4 */ + ( real_type )+0.0, /* real part, component ii = 5 */ + ( real_type )+0.30090111122547001970564237416575, /* imag part, component ii = 5 */ + /* ======= pole nn = 1 ====== */ + ( real_type )+0.93375711808097597031678906145732, /* real part, component ii = 0 */ + ( real_type )+0.28227388511512776948036687916347, /* imag part, component ii = 0 */ + ( real_type )-0.48891408373339520033788238567145, /* real part, component ii = 1 */ + ( real_type )+0.98058090646585679246427864137302, /* imag part, component ii = 1 */ + ( real_type )-0.80575971027319102090828326394266, /* real part, component ii = 2 */ + ( real_type )-0.53898936611542409305995484807186, /* imag part, component ii = 2 */ + ( real_type )+0.46657432173075775373057596178821, /* real part, component ii = 3 */ + ( real_type )-0.55964921359105809694982544087676, /* imag part, component ii = 3 */ + ( real_type )+0.34180541924063762781081431174069, /* real part, component ii = 4 */ + ( real_type )+0.3427525938079192626116722191677, /* imag part, component ii = 4 */ + ( real_type )-0.22242350849375531934902438134807, /* real part, component ii = 5 */ + ( real_type )+0.18796671774622971841554848610581, /* imag part, component ii = 5 */ + /* ======= pole nn = 2 ====== */ + ( real_type )+0.76021371764309095658088511096778, /* real part, component ii = 0 */ + ( real_type )+0.49380190394896794454680308387448, /* imag part, component ii = 0 */ + ( real_type )-0.79609394350190664528589828493657, /* real part, component ii = 1 */ + ( real_type )+0.61127102250393577202621591738052, /* imag part, component ii = 1 */ + ( real_type )-0.34337990356427131768412822573628, /* real part, component ii = 2 */ + ( real_type )-0.81386266289074891136720483222675, /* imag part, component ii = 2 */ + ( real_type )+0.65059149371548070022184863113389, /* real part, component ii = 3 */ + ( real_type )-0.12342235247277904610313013351651, /* imag part, component ii = 3 */ + ( real_type )+0.0013654970200886334941510301728008, /* real part, component ii = 4 */ + ( real_type )+0.43924322776347884757473075802491, /* imag part, component ii = 4 */ + ( real_type )-0.26052258651331289377564770306822, /* real part, component ii = 5 */ + ( real_type )-0.042625945509609278561220458493055, /* imag part, component ii = 5 */ + /* ======= pole nn = 3 ====== */ + ( real_type )+0.53964148581629717588566532326912, /* real part, component ii = 0 */ + ( real_type )+0.59780598866963161507104375663249, /* imag part, component ii = 0 */ + ( real_type )-0.84766686370637990747772074744551, /* real part, component ii = 1 */ + ( real_type )+0.18934771595726364581938916985108, /* imag part, component ii = 1 */ + ( real_type )+0.12611451211156873692475598022312, /* real part, component ii = 2 */ + ( real_type )-0.74651933702596819864107536767402, /* imag part, component ii = 2 */ + ( real_type )+0.49907783834412571386136023355349, /* real part, component ii = 3 */ + ( real_type )+0.26464480018907500619073693090441, /* imag part, component ii = 3 */ + ( real_type )-0.2590446648697068388245213767399, /* real part, component ii = 4 */ + ( real_type )+0.26933389850239200381296738575726, /* imag part, component ii = 4 */ + ( real_type )-0.11824985372702018565304948760496, /* real part, component ii = 5 */ + ( real_type )-0.19047165976541137590932087255505, /* imag part, component ii = 5 */ + /* ======= pole nn = 4 ====== */ + ( real_type )+0.33399718598613178541580257101556, /* real part, component ii = 0 */ + ( real_type )+0.60198345081269674225182633547039, /* imag part, component ii = 0 */ + ( real_type )-0.69952207054246363927481892276438, /* real part, component ii = 1 */ + ( real_type )-0.13241202400835458179124782676693, /* imag part, component ii = 1 */ + ( real_type )+0.39854061329390984159311298877162, /* real part, component ii = 2 */ + ( real_type )-0.46332190352216271517664217317236, /* imag part, component ii = 2 */ + ( real_type )+0.18811421083246068241280801744611, /* real part, component ii = 3 */ + ( real_type )+0.41173439119500646221605042095309, /* imag part, component ii = 3 */ + ( real_type )-0.29776667711147158469441740055891, /* real part, component ii = 4 */ + ( real_type )+0.016077328659664965595852112639495, /* imag part, component ii = 4 */ + ( real_type )+0.049482529706648149074087999890876, /* real part, component ii = 5 */ + ( real_type )-0.17142821215887619729887867959749, /* imag part, component ii = 5 */ + /* ======= pole nn = 5 ====== */ + ( real_type )+0.18023873770401830576172373319967, /* real part, component ii = 0 */ + ( real_type )+0.54283491474428325274547121825489, /* imag part, component ii = 0 */ + ( real_type )-0.47186391188603465563784229200975, /* real part, component ii = 1 */ + ( real_type )-0.2927593164650557618486229314101, /* imag part, component ii = 1 */ + ( real_type )+0.43742967857736002430500383201776, /* real part, component ii = 2 */ + ( real_type )-0.159613865629038011909226796683, /* imag part, component ii = 2 */ + ( real_type )-0.067153465598415454913748421569707, /* real part, component ii = 3 */ + ( real_type )+0.33446225199649668022814999373229, /* imag part, component ii = 3 */ + ( real_type )-0.17476299883303899099667609200056, /* real part, component ii = 4 */ + ( real_type )-0.13909809922200018718958845032981, /* imag part, component ii = 4 */ + ( real_type )+0.11836707844823233243597803379156, /* real part, component ii = 5 */ + ( real_type )-0.060953306357908685017330434139453, /* imag part, component ii = 5 */ + /* ======= pole nn = 6 ====== */ + ( real_type )+0.084804972471113777302191522641103, /* real part, component ii = 0 */ + ( real_type )+0.46054632922146286382179698685087, /* imag part, component ii = 0 */ + ( real_type )-0.26642267850313569685111656389261, /* real part, component ii = 1 */ + ( real_type )-0.31846979742438147997234732548487, /* imag part, component ii = 1 */ + ( real_type )+0.33369079229646944121968803592685, /* real part, component ii = 2 */ + ( real_type )+0.039704858767870393041133492489307, /* imag part, component ii = 2 */ + ( real_type )-0.17182506154762485790208472321895, /* real part, component ii = 3 */ + ( real_type )+0.17073436741060034782501439483468, /* imag part, component ii = 3 */ + ( real_type )-0.031894308383076639938700257011888, /* real part, component ii = 4 */ + ( real_type )-0.15394688797704586215013637936476, /* imag part, component ii = 4 */ + ( real_type )+0.088769809600570128960259510168993, /* real part, component ii = 5 */ + ( real_type )+0.028433935498099490227223620100417, /* imag part, component ii = 5 */ + /* ======= pole nn = 7 ====== */ + ( real_type )+0.034790634459528376653392819006388, /* real part, component ii = 0 */ + ( real_type )+0.38227751724449322418096873789343, /* imag part, component ii = 0 */ + ( real_type )-0.12751433523707929738959936672643, /* real part, component ii = 1 */ + ( real_type )-0.27274111268030707423319341816842, /* imag part, component ii = 1 */ + ( real_type )+0.19889158984525170552262295294651, /* real part, component ii = 2 */ + ( real_type )+0.11754667704704935449668489824441, /* imag part, component ii = 2 */ + ( real_type )-0.15798235998808377725048076453576, /* real part, component ii = 3 */ + ( real_type )+0.038217050706076073978298029293292, /* imag part, component ii = 3 */ + ( real_type )+0.045313103025182256785838266674491, /* real part, component ii = 4 */ + ( real_type )-0.093791540197713864831181307830144, /* imag part, component ii = 4 */ + ( real_type )+0.029976704627670507707641046693031, /* real part, component ii = 5 */ + ( real_type )+0.053465969570171824731493520484189, /* imag part, component ii = 5 */ + /* ======= pole nn = 8 ====== */ + ( real_type )+0.012444321744005097562234654220266, /* real part, component ii = 0 */ + ( real_type )+0.31923996806575228561577188610564, /* imag part, component ii = 0 */ + ( real_type )-0.052126653026498850823909942205847, /* real part, component ii = 1 */ + ( real_type )-0.20885008411463086079807767642065, /* imag part, component ii = 1 */ + ( real_type )+0.096729485058843536811135865322049, /* real part, component ii = 2 */ + ( real_type )+0.11817462523833750741987488245719, /* imag part, component ii = 2 */ + ( real_type )-0.10030873782517255463742834493933, /* real part, component ii = 3 */ + ( real_type )-0.025769514807796351542632760423039, /* imag part, component ii = 3 */ + ( real_type )+0.056678322084720468739163647346852, /* real part, component ii = 4 */ + ( real_type )-0.032101539816919950137056924642196, /* imag part, component ii = 4 */ + ( real_type )-0.0073592249443720339537349911282626, /* real part, component ii = 5 */ + ( real_type )+0.037201129031853461035176594053539, /* imag part, component ii = 5 */ + /* ======= pole nn = 9 ====== */ + ( real_type )+0.003881038619955637411407040680584, /* real part, component ii = 0 */ + ( real_type )+0.27209031085455034661059909971213, /* imag part, component ii = 0 */ + ( real_type )-0.018288963625126350020805781556396, /* real part, component ii = 1 */ + ( real_type )-0.15381621544491524528935233915374, /* imag part, component ii = 1 */ + ( real_type )+0.039211316704895283468586906787111, /* real part, component ii = 2 */ + ( real_type )+0.090330608478997621872435696751784, /* imag part, component ii = 2 */ + ( real_type )-0.049400349832089980557644238245125, /* real part, component ii = 3 */ + ( real_type )-0.039346844366013911033454378322106, /* imag part, component ii = 3 */ + + ( real_type )+0.03859275769152473033950548389918, /* real part, component ii = 4 */ + ( real_type )+0.0011891047113300322739369782820103, /* imag part, component ii = 4 */ + ( real_type )-0.016612677280803531999687829818964, /* real part, component ii = 5 */ + ( real_type )+0.014618032958766532062795805087643, /* imag part, component ii = 5 */ + /* ======= pole nn = 10 ====== */ + ( real_type )+0.0010553403629220348894046152644339, /* real part, component ii = 0 */ + ( real_type )+0.2373265348988182880038096308366, /* imag part, component ii = 0 */ + ( real_type )-0.0055257492186544183844855586567031, /* real part, component ii = 1 */ + ( real_type )-0.11425966380456945538033770312825, /* imag part, component ii = 1 */ + ( real_type )+0.013411037262831515835004193899036, /* real part, component ii = 2 */ + ( real_type )+0.061804565442910883656215489601897, /* imag part, component ii = 2 */ + ( real_type )-0.019722842821969531756171249897052, /* real part, component ii = 3 */ + ( real_type )-0.031696206771263939653361057577715, /* imag part, component ii = 3 */ + ( real_type )+0.019111622250836603455288276587677, /* real part, component ii = 4 */ + ( real_type )+0.010587954919905330162492167657816, /* imag part, component ii = 4 */ + ( real_type )-0.01212450689168268799693273266244, /* real part, component ii = 5 */ + ( real_type )+0.0015908022442007448897003787977559, /* imag part, component ii = 5 */ + /* ======= pole nn = 11 ====== */ + ( real_type )+0.00025021018490812133699464974855978, /* real part, component ii = 0 */ + ( real_type )+0.2111310662193366471757139672037, /* imag part, component ii = 0 */ + ( real_type )-0.0014411072110612792039117484966232, /* real part, component ii = 1 */ + ( real_type )-0.087648478299775742512331974364018, /* imag part, component ii = 1 */ + ( real_type )+0.0038998806567884865010486766884342, /* real part, component ii = 2 */ + ( real_type )+0.041278431345154913237469604499635, /* imag part, component ii = 2 */ + ( real_type )-0.0065264952278302648139234075330014, /* real part, component ii = 3 */ + ( real_type )-0.020816580206935201937858758598365, /* imag part, component ii = 3 */ + ( real_type )+0.0074475381747659418367127897836248, /* real part, component ii = 4 */ + ( real_type )+0.0093345080757839438562300002140794, /* imag part, component ii = 4 */ + ( real_type )-0.0059683500218317749345182218354011, /* real part, component ii = 5 */ + ( real_type )-0.002425949315670312051431784454621, /* imag part, component ii = 5 */ + /* ======= pole nn = 12 ====== */ + ( real_type )+0.000051723186203812306145465090382394, /* real part, component ii = 0 */ + ( real_type )+0.19068111719759752027915870526796, /* imag part, component ii = 0 */ + ( real_type )-0.00032498636359630737414466531011684, /* real part, component ii = 1 */ + ( real_type )-0.06970562683702093127517986084811, /* imag part, component ii = 1 */ + ( real_type )+0.00096925158618720835823880184883858, /* real part, component ii = 2 */ + ( real_type )+0.028305567987458973213439387190091, /* imag part, component ii = 2 */ + ( real_type )-0.0018133381993664538123873027483908, /* real part, component ii = 3 */ + ( real_type )-0.012812625072044405147288715834784, /* imag part, component ii = 3 */ + ( real_type )+0.0023637591897080934012266740590815, /* real part, component ii = 4 */ + ( real_type )+0.0059732404060380626607434772749493, /* imag part, component ii = 4 */ + ( real_type )-0.0022450521223503419275951871575783, /* real part, component ii = 5 */ + ( real_type )-0.0023811452422761944596998911073548, /* imag part, component ii = 5 */ + /* ======= pole nn = 13 ====== */ + ( real_type )+0.0000093225217914050245645220689669343, /* real part, component ii = 0 */ + ( real_type )+0.17416766378502582902663119173603, /* imag part, component ii = 0 */ + ( real_type )-0.000063456392941085698747219843597152, /* real part, component ii = 1 */ + ( real_type )-0.05714251449101158318368194552633, /* imag part, component ii = 1 */ + ( real_type )+0.00020664446091953552366712256389438, /* real part, component ii = 2 */ + ( real_type )+0.020310715258635321661950090870278, /* imag part, component ii = 2 */ + ( real_type )-0.00042655714716637992888163778879052, /* real part, component ii = 3 */ + ( real_type )-0.0079885414500965540039700913561707, /* imag part, component ii = 3 */ + ( real_type )+0.00062254836947204695346528029270674, /* real part, component ii = 4 */ + ( real_type )+0.0034387115674644867988234477242484, /* imag part, component ii = 4 */ + ( real_type )-0.00067688760754977906906419439941818, /* real part, component ii = 5 */ + ( real_type )-0.0014858968524976706371609581554089, /* imag part, component ii = 5 */ + /* ======= pole nn = 14 ====== */ + ( real_type )+0.0000014650397062886179500833912613532, /* real part, component ii = 0 */ + ( real_type )+0.16047101168347768453054076590807, /* imag part, component ii = 0 */ + ( real_type )-0.000010739301949818564645493195385236, /* real part, component ii = 1 */ + ( real_type )-0.04793478621533662946897952188554, /* imag part, component ii = 1 */ + ( real_type )+0.000037896557755649351273865026653075, /* real part, component ii = 2 */ + ( real_type )+0.015219155912937633304360996579126, /* imag part, component ii = 2 */ + ( real_type )-0.000085439224487945971694156887315906, /* real part, component ii = 3 */ + ( real_type )-0.0052308890641597716674843143357707, /* imag part, component ii = 3 */ + ( real_type )+0.0001376272777770237910684459659253, /* real part, component ii = 4 */ + ( real_type )+0.001976526926027240933603775896867, /* imag part, component ii = 4 */ + ( real_type )-0.00016759643777715616229661848766471, /* real part, component ii = 5 */ + ( real_type )-0.00080538419386990317836393575029478, /* imag part, component ii = 5 */ + /* ======= pole nn = 15 ====== */ + ( real_type )+0.00000020073968320415217695153659897956, /* real part, component ii = 0 */ + ( real_type )+0.1488793485856626698339770988152, /* imag part, component ii = 0 */ + ( real_type )-0.0000015766057850952672151246133252495, /* real part, component ii = 1 */ + ( real_type )-0.040916502374366970677175935266772, /* imag part, component ii = 1 */ + ( real_type )+0.0000059905767568739225978953780583554, /* real part, component ii = 2 */ + ( real_type )+0.011799380501713088975053758620893, /* imag part, component ii = 2 */ + ( real_type )-0.000014632222751737225312029010479964, /* real part, component ii = 3 */ + ( real_type )-0.0036130376679990937784848106329236, /* imag part, component ii = 3 */ + ( real_type )+0.000025735013810654973744508461675444, /* real part, component ii = 4 */ + ( real_type )+0.0011944926209741751449997413319328, /* imag part, component ii = 4 */ + ( real_type )-0.000034571576063097877760649761817811, /* real part, component ii = 5 */ + ( real_type )-0.00043108955421020549336131906478999, /* imag part, component ii = 5 */ + /* ======= pole nn = 16 ====== */ + ( real_type )+0.000000023981973818259450774325197133491, /* real part, component ii = 0 */ + ( real_type )+0.13891644940561371062569280602016, /* imag part, component ii = 0 */ + ( real_type )-0.00000020091091404273774347582071159638, /* real part, component ii = 1 */ + ( real_type )-0.035404558012365380312943535121619, /* imag part, component ii = 1 */ + ( real_type )+0.00000081759169495864097795259893595499, /* real part, component ii = 2 */ + ( real_type )+0.0093858164013739305331682032307173, /* imag part, component ii = 2 */ + ( real_type )-0.0000021492061128764803387587313707712, /* real part, component ii = 3 */ + ( real_type )-0.0026071051957554622977849535416305, /* imag part, component ii = 3 */ + ( real_type )+0.0000040924909093626972191440408216767, /* real part, component ii = 4 */ + ( real_type )+0.00076740015272712817058730248694807, /* imag part, component ii = 4 */ + ( real_type )-0.0000059973518885757342446236557950261, /* real part, component ii = 5 */ + ( real_type )-0.00024294921885580505196871738081088, /* imag part, component ii = 5 */ + /* ======= pole nn = 17 ====== */ + ( real_type )+0.0000000024980692045821258097510404711114, /* real part, component ii = 0 */ + ( real_type )+0.13024740171229320629969008874562, /* imag part, component ii = 0 */ + ( real_type )-0.000000022235761606943296664477868977993, /* real part, component ii = 1 */ + ( real_type )-0.030976293948567907829288627886776, /* imag part, component ii = 1 */ + ( real_type )+0.000000096464179986492842475383385936404, /* real part, component ii = 2 */ + ( real_type )+0.0076153697520735797950354674456489, /* imag part, component ii = 2 */ + ( real_type )-0.00000027139114259882675014310758466137, /* real part, component ii = 3 */ + ( real_type )-0.0019443942758009957577073747085063, /* imag part, component ii = 3 */ + ( real_type )+0.00000055569320739187190352894054194245, /* real part, component ii = 4 */ + ( real_type )+0.00051916558784461541529890726719493, /* imag part, component ii = 4 */ + ( real_type )-0.00000088070850515596665751578453637642, /* real part, component ii = 5 */ + ( real_type )-0.0001464794745155215039282529674027, /* imag part, component ii = 5 */ + /* ======= pole nn = 18 ====== */ + ( real_type )+2.2687772443535217693511914029833e-10, /* real part, component ii = 0 */ + ( real_type )+0.12262715881089526716168626576724, /* imag part, component ii = 0 */ + ( real_type )-0.000000002138272177047815761070063726908, /* real part, component ii = 1 */ + ( real_type )-0.027354576657179796462370682551688, /* imag part, component ii = 1 */ + ( real_type )+0.0000000098494925197479553681012513131119, /* real part, component ii = 2 */ + ( real_type )+0.0062782467914870717677614781281006, /* imag part, component ii = 2 */ + ( real_type )-0.000000029517578556929254245921153950795, /* real part, component ii = 3 */ + ( real_type )-0.0014873095594396108088549204722592, /* imag part, component ii = 3 */ + ( real_type )+0.000000064624409699782439037265677151585, /* real part, component ii = 4 */ + ( real_type )+0.00036526719341847904307151664809437, /* imag part, component ii = 4 */ + ( real_type )-0.00000011000711103047638993671089404554, /* real part, component ii = 5 */ + ( real_type )-0.000093588615088669178693067460636049, /* imag part, component ii = 5 */ + /* ======= pole nn = 19 ====== */ + ( real_type )+1.7965822334136598818238788950604e-11, /* real part, component ii = 0 */ + ( real_type )+0.11587097251890988822145747203609, /* imag part, component ii = 0 */ + ( real_type )-1.7873076895863940652596084829635e-10, /* real part, component ii = 1 */ + ( real_type )-0.024348920331909153786372952219037, /* imag part, component ii = 1 */ + ( real_type )+8.7107468965648074865261058066019e-10, /* real part, component ii = 2 */ + ( real_type )+0.005245143773907612104584027369068, /* imag part, component ii = 2 */ + ( real_type )-0.0000000027694392134333165369386960294087, /* real part, component ii = 3 */ + ( real_type )-0.0011609418784759838536321207929917, /* imag part, component ii = 3 */ + ( real_type )+0.0000000064523188160978617317336680723085, /* real part, component ii = 4 */ + ( real_type )+0.00026479990707256154154634089159106, /* imag part, component ii = 4 */ + ( real_type )-0.000000011730243995765755270415153450135, /* real part, component ii = 5 */ + ( real_type )-0.000062489095672205330528357220419356, /* imag part, component ii = 5 */ + /* ======= pole nn = 20 ====== */ + ( real_type )+1.2404240973367851584778897534523e-12, /* real part, component ii = 0 */ + ( real_type )+0.1098362769681518483909285764697, /* imag part, component ii = 0 */ + ( real_type )-1.298969077176331621982980936204e-11, /* real part, component ii = 1 */ + ( real_type )-0.021823635640486277393965150233485, /* imag part, component ii = 1 */ + ( real_type )+6.6773437737621157971588213132363e-11, /* real part, component ii = 2 */ + ( real_type )+0.0044320120364682701884582044944389, /* imag part, component ii = 2 */ + ( real_type )-2.2442347443154233770616810425146e-10, /* real part, component ii = 3 */ + ( real_type )-0.00092155007788721109688240744356282, /* imag part, component ii = 3 */ + ( real_type )+5.5415256327054792715433938450672e-10, /* real part, component ii = 4 */ + ( real_type )+0.00019660644393716836366126600875097, /* imag part, component ii = 4 */ + + ( real_type )-0.0000000010708450247198540343349748623994, /* real part, component ii = 5 */ + ( real_type )-0.000043151542126063345363431783857041, /* imag part, component ii = 5 */ + /* ======= pole nn = 21 ====== */ + ( real_type )+7.4672577020182875440016859651146e-14, /* real part, component ii = 0 */ + ( real_type )+0.10441096552127306445543857671136, /* imag part, component ii = 0 */ + ( real_type )-8.2106786786928587328349576061717e-13, /* real part, component ii = 1 */ + ( real_type )-0.019679360729957722054156214767237, /* imag part, component ii = 1 */ + ( real_type )+4.4393837911241883207016186702722e-12, /* real part, component ii = 2 */ + ( real_type )+0.0037819708977395739226231654500022, /* imag part, component ii = 2 */ + ( real_type )-1.5723812843525390463303037280301e-11, /* real part, component ii = 3 */ + ( real_type )-0.00074207349986206600447724806024643, /* imag part, component ii = 3 */ + ( real_type )+4.1003396155623084161527569873175e-11, /* real part, component ii = 4 */ + ( real_type )+0.00014889562477175441870019280993402, /* imag part, component ii = 4 */ + ( real_type )-8.3881652556906059966631908674231e-11, /* real part, component ii = 5 */ + ( real_type )-0.000030609180709397879714204992464072, /* imag part, component ii = 5 */ + /* ======= pole nn = 22 ====== */ + ( real_type )+3.9194031326808708626455909986852e-15, /* real part, component ii = 0 */ + ( real_type )+0.099505481546473999566411208266413, /* imag part, component ii = 0 */ + ( real_type )-4.5148282989652500394710017721569e-14, /* real part, component ii = 1 */ + ( real_type )-0.017841695571651428868463002112912, /* imag part, component ii = 1 */ + ( real_type )+2.5611603949854222001811303619656e-13, /* real part, component ii = 2 */ + ( real_type )+0.0032553079685830720387268042850504, /* imag part, component ii = 2 */ + ( real_type )-9.5331613908539489537576380947164e-13, /* real part, component ii = 3 */ + ( real_type )-0.00060502157356591611282409939338385, /* imag part, component ii = 3 */ + ( real_type )+2.6172953777583858662015187181188e-12, /* real part, component ii = 4 */ + ( real_type )+0.00011468306892164779307267070527788, /* imag part, component ii = 4 */ + ( real_type )-5.6484892271287050682397438174311e-12, /* real part, component ii = 5 */ + ( real_type )-0.00002220219423824602272742007102591, /* imag part, component ii = 5 */ + /* ======= pole nn = 23 ====== */ + ( real_type )+1.7936866768385370756626538036594e-16, /* real part, component ii = 0 */ + ( real_type )+0.095047308459488420379731640070237, /* imag part, component ii = 0 */ + ( real_type )-2.160095939939171086934421782295e-15, /* real part, component ii = 1 */ + ( real_type )-0.01625388257043277239408391728447, /* imag part, component ii = 1 */ + ( real_type )+1.2827402609576721298174443554251e-14, /* real part, component ii = 2 */ + ( real_type )+0.0028235911853785800300261290465192, /* imag part, component ii = 2 */ + ( real_type )-5.0052430343726648115144665585684e-14, /* real part, component ii = 3 */ + ( real_type )-0.00049869975686168450450500529427224, /* imag part, component ii = 3 */ + ( real_type )+1.4427879834645452305577800957287e-13, /* real part, component ii = 4 */ + ( real_type )+0.000089636254293407267781422284128181, /* imag part, component ii = 4 */ + ( real_type )-3.2748235779389763984810293617617e-13, /* real part, component ii = 5 */ + ( real_type )-0.00001641388904265690541641987229264, /* imag part, component ii = 5 */ +}; + +#else /* !defined( _GPUCODE ) && + ( ( N_FOURIER != 24 ) || || ( TM != 12 ) || ( N_TAYLOR != 6 ) ) */ + + #error "precomputed fourier coefficients only provided for " \ + "SIXTRL_CERRF_ABQ2011_N_FOURIER == 24, "\ + "SIXTRL_CERRF_ABQ2011_TM == 12, and " \ + "SIXTRL_CERRF_ABQ2011_NUM_TAYLOR_COEFF == 6 " \ + "-> provide your own tabulated data for other configurations" + +#endif /* !defined( _GPUCODE ) */ From 8413fa79a5d1379bfc7ac8533857d19b42fbe675 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:13:55 +0200 Subject: [PATCH 62/77] common: enables building new faddeeva implementation --- sixtracklib/common/be_beamfields/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sixtracklib/common/be_beamfields/CMakeLists.txt b/sixtracklib/common/be_beamfields/CMakeLists.txt index a3c011a8b..58469023e 100644 --- a/sixtracklib/common/be_beamfields/CMakeLists.txt +++ b/sixtracklib/common/be_beamfields/CMakeLists.txt @@ -1,7 +1,10 @@ -set( SIXTRL_COMMON_BE_BEAMFIELDS_SOURCES be_beamfields.c ) +set( SIXTRL_COMMON_BE_BEAMFIELDS_SOURCES + be_beamfields.c faddeeva.c dawson_approx.c dawson_coeff_xn96_ntmax25.c + abq2011_coeff_nf24_tm12.c abq2011_coeff_poles_nf24_tm12_nt6.c ) set( SIXTRL_COMMON_BE_BEAMFIELDS_HEADERS - be_beamfields.h track.h faddeeva_cern.h gauss_fields.h ) + be_beamfields.h track.h faddeeva.h abq2011_coeff.h + dawson_approx.h dawson_coeff.h gauss_fields.h ) add_library( sixtrack_common_be_beamfields OBJECT ${SIXTRL_COMMON_BE_BEAMFIELDS_HEADERS} From 8e48346adf7867741d3fc0d9e4006be15ac941bc Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:21:23 +0200 Subject: [PATCH 63/77] opencl: adds helpers - adds helper functions for 1D grids to get global and local ids and sizes - adds searching for largest elements in a (shared memory) array bound to wavefront / warps --- sixtracklib/opencl/helpers.h | 115 +++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 sixtracklib/opencl/helpers.h diff --git a/sixtracklib/opencl/helpers.h b/sixtracklib/opencl/helpers.h new file mode 100644 index 000000000..e21505955 --- /dev/null +++ b/sixtracklib/opencl/helpers.h @@ -0,0 +1,115 @@ +#ifndef SIXTRACKLIB_OPENCL_HELPERS_H__ +#define SIXTRACKLIB_OPENCL_HELPERS_H__ + +#if !defined( SIXTRL_NO_INCLUDES ) + #include "sixtracklib/common/definitions.h" + #if defined( _GPUCODE ) && defined( __OPENCL_C_VERSION__ ) + #include "sixtracklib/opencl/opencl.h" + #endif /* defined( _GPUCODE ) && defined( __OPENCL_C_VERSION__ ) */ +#endif /* !defined( SIXTRL_NO_INCLUDES ) */ + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +extern "C" { +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ + +SIXTRL_STATIC SIXTRL_FN SIXTRL_UINT64_T NS(Grid_global_id)( void ) SIXTRL_NOEXCEPT; +SIXTRL_STATIC SIXTRL_FN SIXTRL_UINT64_T NS(Grid_gloabl_size)( void ) SIXTRL_NOEXCEPT; + +SIXTRL_STATIC SIXTRL_FN SIXTRL_UINT64_T NS(Grid_local_id)( void ) SIXTRL_NOEXCEPT; +SIXTRL_STATIC SIXTRL_FN SIXTRL_UINT64_T NS(Grid_local_size)( void ) SIXTRL_NOEXCEPT; + +#if defined( _GPUCODE ) && defined( __OPENCL_VERSION__ ) && \ + ( __OPENCL_VERSION__ >= 120 ) + + #if !defined( SIXTRL_SHARED_BUILD_ARRAY ) + #define SIXTRL_SHARED_BUILD_ARRAY( T, values, local_value ) \ + do {\ + unsigned int const local_id = ( unsigned int )get_local_id( 0 ); \ + values[ local_id ] = ( local_value ); \ + barrier( CLK_LOCAL_MEM_FENCE ); \ + } while( false ) + + #endif /* !defined( SIXTRL_SHARED_BUILD_ARRAY ) */ + + #if !defined( SIXTRL_SHARED_FIND_MAX_PER_W ) + #define SIXTRL_SHARED_FIND_MAX_PER_W( T, values, N_w, result ) \ + do {\ + unsigned int const local_id = ( unsigned int )get_local_id( 0 ); \ + unsigned int const n_wavefront = ( N_w ) * ( ( local_id ) / ( N_w ) ); \ + unsigned int const id_in_wavefront = ( local_id ) % ( N_w ); \ + unsigned int n = ( N_w ) >> 1u; \ + for( ; n > 0 ; n >>= 1u ) { \ + unsigned int const cmp_idx = local_id + n;\ + barrier( CLK_LOCAL_MEM_FENCE );\ + if( id_in_wavefront < n ) \ + values[ local_id ] = max( values[ local_id ], values[ cmp_idx ] ); }\ + result = values[ n_wavefront ]; } while( false ) + + #endif /* !defined( SIXTRL_SHARED_FIND_MAX_PER_W ) */ + + #if !defined( SIXTRL_SHARED_OR_PER_W ) + #define SIXTRL_SHARED_OR_PER_W( T, values, flag, N_w, result ) \ + do {\ + unsigned int const local_id = ( unsigned int )get_local_id( 0 ); \ + unsigned int const n_wavefront = ( N_w ) * ( ( local_id ) / ( N_w ) ); \ + atomic_or( &values[ n_wavefront ], ( T )( flag ) ); \ + barrier( CLK_LOCAL_MEM_FENCE ); \ + result = ( values[ n_wavefront ] != 0 ); \ + } while( false ) + + #endif /* !defined( SIXTRL_SAFE_OR_PER_W ) */ + + /* ********************************************************************* */ + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_global_id)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )get_global_id( 0 ); } + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_gloabl_size)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )get_global_size( 0 ); } + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_local_id)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )get_local_id( 0 ); } + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_local_size)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )get_local_size( 0 ); } + +#else /* defined( _GPUCODE ) && defined( __OPENCL_VERSION__ ) && + ( __OPENCL_VERSION__ >= 120 ) */ + + #if !defined( SIXTRL_SHARED_BUILD_ARRAY ) && defined( __OPENCL_VERSION__ ) + #define SIXTRL_SHARED_BUILD_ARRAY( T, values, local_value ) \ + do { } while( false ) + + #endif /* !defined( SIXTRL_SHARED_BUILD_ARRAY ) */ + + #if !defined( SIXTRL_SHARED_FIND_MAX_PER_W ) && defined( __OPENCL_VERSION__ ) + #define SIXTRL_SHARED_FIND_MAX_PER_W( T, values, local_id, N_w, result ) \ + do { result = ( T )0u; } while( false ) + + #endif /* !defined( SIXTRL_SHARED_FIND_MAX_PER_W ) */ + + #if !defined( SIXTRL_SHARED_OR_PER_W ) && defined( __OPENCL_VERSION__ ) + #define SIXTRL_SHARED_OR_PER_W( T, values, flag, local_id, N_w, result ) \ + do { result = ( flag ); } while( false ) + + #endif /* !defined( SIXTRL_SAFE_OR_PER_W ) */ + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_global_id)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )0u; } + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_gloabl_size)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )0u; } + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_local_id)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )0u; } + + SIXTRL_INLINE SIXTRL_UINT64_T NS(Grid_local_size)() SIXTRL_NOEXCEPT { + return ( SIXTRL_UINT64_T )0u; } + +#endif /* defined( _GPUCODE ) && defined( __OPENCL_VERSION__ ) && + ( __OPENCL_VERSION__ >= 120 ) */ + +#if !defined( _GPUCODE ) && defined( __cplusplus ) +} +#endif /* !defined( _GPUCODE ) && defined( __cplusplus ) */ +#endif /* SIXTRACKLIB_OPENCL_HELPERS_H__ */ From 52e7a2308ba33952c85baadd5c79db787290ba3f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Fri, 3 Sep 2021 14:24:11 +0200 Subject: [PATCH 64/77] sixtracklib: adds new headers for faddeeva implementation --- sixtracklib/sixtracklib.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sixtracklib/sixtracklib.h b/sixtracklib/sixtracklib.h index f17bb9fe9..44ae58b6e 100644 --- a/sixtracklib/sixtracklib.h +++ b/sixtracklib/sixtracklib.h @@ -21,10 +21,11 @@ #include "sixtracklib/common/buffer/buffer_object.h" #include "sixtracklib/common/buffer/buffer_garbage.h" #include "sixtracklib/common/buffer/buffer_generic.h" +#include "sixtracklib/common/be_beamfields/definitions.h" #include "sixtracklib/common/be_beamfields/be_beamfields.h" -#include "sixtracklib/common/be_beamfields/track.h" -#include "sixtracklib/common/be_beamfields/faddeeva_cern.h" +#include "sixtracklib/common/be_beamfields/faddeeva.h" #include "sixtracklib/common/be_beamfields/gauss_fields.h" +#include "sixtracklib/common/be_beamfields/track.h" #include "sixtracklib/common/be_cavity/be_cavity.h" #include "sixtracklib/common/be_cavity/track.h" #include "sixtracklib/common/be_drift/be_drift.h" From a02f581441daa243ad809c92f3c539bea70fe827 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sat, 4 Sep 2021 15:28:42 +0200 Subject: [PATCH 65/77] tests/python: adds unit tests for faddeeva implementation --- .../beam_elements/beamfields/CMakeLists.txt | 8 ++ .../beam_elements/beamfields/test_alg680.py | 45 +++++++++ .../beam_elements/beamfields/test_cernlib.py | 94 +++++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 tests/python/beam_elements/beamfields/CMakeLists.txt create mode 100644 tests/python/beam_elements/beamfields/test_alg680.py create mode 100644 tests/python/beam_elements/beamfields/test_cernlib.py diff --git a/tests/python/beam_elements/beamfields/CMakeLists.txt b/tests/python/beam_elements/beamfields/CMakeLists.txt new file mode 100644 index 000000000..c62e57862 --- /dev/null +++ b/tests/python/beam_elements/beamfields/CMakeLists.txt @@ -0,0 +1,8 @@ +add_test( NAME Python_BeamElements_BeamFields_FaddeevaCERNLib + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_cernlib.py + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python ) + +add_test( NAME Python_BeamElements_BeamFields_FaddeevaCERNLib + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_alg680.py + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python ) + diff --git a/tests/python/beam_elements/beamfields/test_alg680.py b/tests/python/beam_elements/beamfields/test_alg680.py new file mode 100644 index 000000000..98865ed55 --- /dev/null +++ b/tests/python/beam_elements/beamfields/test_alg680.py @@ -0,0 +1,45 @@ +import numpy as np +import ctypes as ct +from scipy.special import wofz as wofz_scipy +from sixtracklib.stcommon import st_cerrf_alg680_q1 + +if __name__ == '__main__': + x_oe = np.logspace( -8, 8, 101, dtype=np.float64 ) + y_oe = np.logspace( -8, 8, 101, dtype=np.float64 ) + + n_re = len( x_oe ) + n_im = len( y_oe ) + + wz_re_cmp = np.arange( n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + wz_im_cmp = np.arange( n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + for jj, y in enumerate( y_oe ): + for ii, x in enumerate( x_oe ): + wz = wofz_scipy( x + 1.0j * y ) + wz_re_cmp[ jj, ii ] = wz.real + wz_im_cmp[ jj, ii ] = wz.imag + + # -------------------------------------------------------------------------- + # ACM algorithm 680 + + wz_re_alg680 = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + wz_im_alg680 = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + out_re = ct.c_double( 0. ) + out_im = ct.c_double( 0. ) + + for jj, y in enumerate( y_oe ): + for ii, x in enumerate( x_oe ): + st_cerrf_alg680_q1( ct.c_double( x ), ct.c_double( y ), + ct.byref( out_re ), ct.byref( out_im ) ) + wz_re_alg680[ jj, ii ] = np.float64( out_re ) + wz_im_alg680[ jj, ii ] = np.float64( out_im ) + + print( np.fabs( wz_re_cmp - wz_re_alg680 ).max() ) + print( np.fabs( wz_im_cmp - wz_im_alg680 ).max() ) + + assert np.fabs( wz_re_cmp - wz_re_alg680 ).max() < 1e-10 + assert np.fabs( wz_im_cmp - wz_im_alg680 ).max() < 1e-10 diff --git a/tests/python/beam_elements/beamfields/test_cernlib.py b/tests/python/beam_elements/beamfields/test_cernlib.py new file mode 100644 index 000000000..b29935bda --- /dev/null +++ b/tests/python/beam_elements/beamfields/test_cernlib.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np +import ctypes as ct +from scipy.special import wofz as wofz_scipy +from sixtracklib.stcommon import \ + st_cerrf_cernlib_c_baseline_q1, \ + st_cerrf_cernlib_c_upstream_q1, \ + st_cerrf_cernlib_c_optimised_q1 + +if __name__ == '__main__': + x_oe = np.logspace( -8, 8, 101, dtype=np.float64 ) + y_oe = np.logspace( -8, 8, 101, dtype=np.float64 ) + + n_re = len( x_oe ) + n_im = len( y_oe ) + + wz_re_cmp = np.arange( n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + wz_im_cmp = np.arange( n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + for jj, y in enumerate( y_oe ): + for ii, x in enumerate( x_oe ): + wz = wofz_scipy( x + 1.0j * y ) + wz_re_cmp[ jj, ii ] = wz.real + wz_im_cmp[ jj, ii ] = wz.imag + + # -------------------------------------------------------------------------- + # CERNLib C baseline + + wz_re_cernlib_baseline = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + wz_im_cernlib_baseline = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + out_re = ct.c_double( 0. ) + out_im = ct.c_double( 0. ) + + for jj, y in enumerate( y_oe ): + for ii, x in enumerate( x_oe ): + st_cerrf_cernlib_c_baseline_q1( ct.c_double( x ), ct.c_double( y ), + ct.byref( out_re ), ct.byref( out_im ) ) + wz_re_cernlib_baseline[ jj, ii ] = np.float64( out_re ) + wz_im_cernlib_baseline[ jj, ii ] = np.float64( out_im ) + + assert np.fabs( wz_re_cmp - wz_re_cernlib_baseline ).max() < 1e-10 + assert np.fabs( wz_im_cmp - wz_im_cernlib_baseline ).max() < 1e-10 + + # -------------------------------------------------------------------------- + # CERNLib C upstream + + wz_re_cernlib_upstream = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + wz_im_cernlib_upstream = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + out_re = ct.c_double( 0. ) + out_im = ct.c_double( 0. ) + + for jj, y in enumerate( y_oe ): + for ii, x in enumerate( x_oe ): + st_cerrf_cernlib_c_upstream_q1( ct.c_double( x ), ct.c_double( y ), + ct.byref( out_re ), ct.byref( out_im ) ) + wz_re_cernlib_upstream[ jj, ii ] = np.float64( out_re ) + wz_im_cernlib_upstream[ jj, ii ] = np.float64( out_im ) + + assert np.fabs( wz_re_cmp - wz_re_cernlib_upstream ).max() < 1e-10 + assert np.fabs( wz_im_cmp - wz_im_cernlib_upstream ).max() < 1e-10 + + # -------------------------------------------------------------------------- + # CERNLib C upstream + + wz_re_cernlib_optimised = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + wz_im_cernlib_optimised = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + out_re = ct.c_double( 0. ) + out_im = ct.c_double( 0. ) + + for jj, y in enumerate( y_oe ): + for ii, x in enumerate( x_oe ): + st_cerrf_cernlib_c_optimised_q1( ct.c_double( x ), ct.c_double( y ), + ct.byref( out_re ), ct.byref( out_im ) ) + wz_re_cernlib_optimised[ jj, ii ] = np.float64( out_re ) + wz_im_cernlib_optimised[ jj, ii ] = np.float64( out_im ) + + assert np.fabs( wz_re_cmp - wz_re_cernlib_optimised ).max() < 1e-10 + assert np.fabs( wz_im_cmp - wz_im_cernlib_optimised ).max() < 1e-10 + + From 680c115673a1ffb8500d58e1935714155563d687 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sat, 4 Sep 2021 15:30:25 +0200 Subject: [PATCH 66/77] tests/python: enable beamfields test --- tests/python/beam_elements/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/python/beam_elements/CMakeLists.txt b/tests/python/beam_elements/CMakeLists.txt index 50402083b..cebb9eb7e 100644 --- a/tests/python/beam_elements/CMakeLists.txt +++ b/tests/python/beam_elements/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory( beamfields ) + add_test( NAME Python_BeamElements_MultiPole COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_multipole.py WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python ) From 9cc99317dc4dcf2e1b51a0c5655c75070c81bd75 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sat, 4 Sep 2021 15:31:26 +0200 Subject: [PATCH 67/77] common: fixes bugs and inconsistencies for Faddeeva implementation --- sixtracklib/common/be_beamfields/faddeeva.c | 2 +- sixtracklib/common/be_beamfields/faddeeva.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sixtracklib/common/be_beamfields/faddeeva.c b/sixtracklib/common/be_beamfields/faddeeva.c index 14f61aa65..7a6da6cdf 100644 --- a/sixtracklib/common/be_beamfields/faddeeva.c +++ b/sixtracklib/common/be_beamfields/faddeeva.c @@ -37,7 +37,7 @@ void NS(cerrf_cernlib_c_upstream_q1_ext)( SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag ) SIXTRL_NOEXCEPT { - NS(cerrf_cernlib_c_upstream_q1_ext)( x, y, out_real, out_imag ); + NS(cerrf_cernlib_c_upstream_q1)( x, y, out_real, out_imag ); } void NS(cerrf_cernlib_c_optimised_q1_ext)( diff --git a/sixtracklib/common/be_beamfields/faddeeva.h b/sixtracklib/common/be_beamfields/faddeeva.h index 7e65ffc9e..39f24f108 100644 --- a/sixtracklib/common/be_beamfields/faddeeva.h +++ b/sixtracklib/common/be_beamfields/faddeeva.h @@ -343,6 +343,7 @@ SIXTRL_INLINE void NS(cerrf_cernlib_c_upstream_q1)( else { Rx[ 0 ] = Ry[ 0 ] = ( real_type )0.0; + n = ( int )SIXTRL_CERRF_CERNLIB_UPSTREAM_K; for( ; n > 0 ; --n ) { @@ -423,8 +424,8 @@ SIXTRL_INLINE void NS(cerrf_cernlib_c_optimised_q1)( int n = 0; bool use_taylor_sum = ( - ( y < ( real_type )SIXTRL_CERRF_CERNLIB_X0 ) && - ( x < ( real_type )SIXTRL_CERRF_CERNLIB_Y0 ) ); + ( y < ( real_type )SIXTRL_CERRF_CERNLIB_Y0 ) && + ( x < ( real_type )SIXTRL_CERRF_CERNLIB_X0 ) ); #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) From bf811dd597a19a0a19bdb7b15f609c30d5bf02bb Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sat, 4 Sep 2021 15:46:07 +0200 Subject: [PATCH 68/77] python: adds bindings for the faddeeva and dawson methods --- python/sixtracklib/stcommon.py | 73 +++++++++++++++++++ .../common/be_beamfields/dawson_approx.c | 3 + .../common/be_beamfields/dawson_approx.h | 3 + 3 files changed, 79 insertions(+) diff --git a/python/sixtracklib/stcommon.py b/python/sixtracklib/stcommon.py index 87d8fe8ac..088695ee2 100644 --- a/python/sixtracklib/stcommon.py +++ b/python/sixtracklib/stcommon.py @@ -6693,6 +6693,79 @@ def Math_q_gauss(x, q, sqrt_beta, mu=None): # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # SC related methods: +# Faddeeva / Dawson related methods + +st_cerrf_cernlib_c_baseline_q1 = sixtracklib.st_cerrf_cernlib_c_baseline_q1_ext +st_cerrf_cernlib_c_baseline_q1.restype = None +st_cerrf_cernlib_c_baseline_q1.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf_cernlib_c_upstream_q1 = sixtracklib.st_cerrf_cernlib_c_upstream_q1_ext +st_cerrf_cernlib_c_upstream_q1.restype = None +st_cerrf_cernlib_c_upstream_q1.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf_cernlib_c_optimised_q1 = sixtracklib.st_cerrf_cernlib_c_optimised_q1_ext +st_cerrf_cernlib_c_optimised_q1.restype = None +st_cerrf_cernlib_c_optimised_q1.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf_alg680_q1 = sixtracklib.st_cerrf_alg680_q1_ext +st_cerrf_alg680_q1.restype = None +st_cerrf_alg680_q1.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf_abq2011_a_m_coeff = sixtracklib.st_cerrf_abq2011_a_m_coeff_ext +st_cerrf_abq2011_a_m_coeff.restype = ct.c_double +st_cerrf_abq2011_a_m_coeff.argtypes = [ ct.c_int ] + +st_cerrf_abq2011_q1 = sixtracklib.st_cerrf_abq2011_q1_ext +st_cerrf_abq2011_q1.restype = None +st_cerrf_abq2011_q1.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf_abq2011_q1_coeff = sixtracklib.st_cerrf_abq2011_q1_coeff_ext +st_cerrf_abq2011_q1_coeff.restype = None +st_cerrf_abq2011_q1_coeff.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, st_double_p, st_double_p, ] + +st_cerrf_q1 = sixtracklib.st_cerrf_q1_ext +st_cerrf_q1.restype = None +st_cerrf_q1.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf = sixtracklib.st_cerrf_ext +st_cerrf.restype = None +st_cerrf.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] + + +st_dawson_n_interval = sixtracklib.st_dawson_n_interval_ext +st_dawson_n_interval.restype = ct.c_int +st_dawson_n_interval.argtypes = [ ct.c_double ] + +st_dawson_xi = sixtracklib.st_dawson_xi_ext +st_dawson_xi.restype = ct.c_double +st_dawson_xi.argtypes = [ ct.c_int ] + +st_dawson_fz_xi = sixtracklib.st_dawson_fz_xi_ext +st_dawson_fz_xi.restype = ct.c_double +st_dawson_fz_xi.argtypes = [ ct.c_int ] + +st_dawson_nt_xi_abs_d10 = sixtracklib.st_dawson_nt_xi_abs_d10_ext +st_dawson_nt_xi_abs_d10.restype = ct.c_double +st_dawson_nt_xi_abs_d10.argtypes = [ ct.c_int ] + +st_dawson_nt_xi_rel_d14 = sixtracklib.st_dawson_nt_xi_rel_d14_ext +st_dawson_nt_xi_rel_d14.restype = ct.c_double +st_dawson_nt_xi_rel_d14.argtypes = [ ct.c_int ] + +st_dawson_cerrf = sixtracklib.st_dawson_cerrf_nocoeff_ext +st_dawson_cerrf.restype = None +st_dawson_cerrf.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_dawson_cerrf_coeff = sixtracklib.st_dawson_cerrf_ext +st_dawson_cerrf_coeff.restype = None +st_dawson_cerrf_coeff.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +# Beam Elements + st_BeamBeam4D_p = ct.c_void_p st_NullBeamBeam4D = ct.cast(0, st_BeamBeam4D_p) diff --git a/sixtracklib/common/be_beamfields/dawson_approx.c b/sixtracklib/common/be_beamfields/dawson_approx.c index 523f891cf..f11ad65e1 100644 --- a/sixtracklib/common/be_beamfields/dawson_approx.c +++ b/sixtracklib/common/be_beamfields/dawson_approx.c @@ -22,6 +22,9 @@ SIXTRL_REAL_T NS(dawson_fz_xi_ext)( int n_interval ) SIXTRL_NOEXCEPT { int NS(dawson_nt_xi_abs_d10_ext)( int n_interval ) SIXTRL_NOEXCEPT { return NS(dawson_nt_xi_abs_d10)( n_interval ); } +int NS(dawson_nt_xi_rel_d14_ext)( int n_interval ) SIXTRL_NOEXCEPT { + return NS(dawson_nt_xi_rel_d14)( n_interval ); } + void NS(dawson_cerrf_nocoeff_ext)( SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, diff --git a/sixtracklib/common/be_beamfields/dawson_approx.h b/sixtracklib/common/be_beamfields/dawson_approx.h index d192d9a42..5f3198596 100644 --- a/sixtracklib/common/be_beamfields/dawson_approx.h +++ b/sixtracklib/common/be_beamfields/dawson_approx.h @@ -63,6 +63,9 @@ SIXTRL_EXTERN SIXTRL_HOST_FN SIXTRL_REAL_T NS(dawson_fz_xi_ext)( SIXTRL_EXTERN SIXTRL_HOST_FN int NS(dawson_nt_xi_abs_d10_ext)( int n_interval ) SIXTRL_NOEXCEPT; +SIXTRL_EXTERN SIXTRL_HOST_FN int NS(dawson_nt_xi_rel_d14_ext)( + int n_interval ) SIXTRL_NOEXCEPT; + SIXTRL_EXTERN SIXTRL_HOST_FN void NS(dawson_cerrf_nocoeff_ext)( SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, From 69ee3ec07caf9b9e21d44745086ff100ea83e197 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sat, 4 Sep 2021 17:49:25 +0200 Subject: [PATCH 69/77] common: fixes bugs in Alg680 Faddeeva implementation --- sixtracklib/common/be_beamfields/faddeeva.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sixtracklib/common/be_beamfields/faddeeva.h b/sixtracklib/common/be_beamfields/faddeeva.h index 39f24f108..f3175090c 100644 --- a/sixtracklib/common/be_beamfields/faddeeva.h +++ b/sixtracklib/common/be_beamfields/faddeeva.h @@ -802,8 +802,8 @@ SIXTRL_INLINE void NS(cerrf_alg680_q1)( real_type nn_plus_1; int n; - int N = 0; - int nu = 0; + int N = -1; + int nu = 0; if( !use_cont_fraction ) { @@ -825,12 +825,12 @@ SIXTRL_INLINE void NS(cerrf_alg680_q1)( two_h_n = NS(pow_int_exp)( two_h_n, N ); - if( two_h_n > ( real_type )SIXTRL_CERRF_ALG680_MIN_POW_2H_N ) + if( two_h_n <= ( real_type )SIXTRL_CERRF_ALG680_MIN_POW_2H_N ) { - use_cont_fraction = false; + use_cont_fraction = true; two_h_n = ( real_type )0.0; inv_two_h = ( real_type )1.0; - N = 0; + N = -1; } } else @@ -875,7 +875,7 @@ SIXTRL_INLINE void NS(cerrf_alg680_q1)( sy = ( ry * tx ) + ( rx * sy ); } - if( !use_cont_fraction ) + if( use_cont_fraction ) { wz_re = NS(MathConst_two_over_sqrt_pi)() * rx; wz_im = NS(MathConst_two_over_sqrt_pi)() * ry; From c0992c689276a2199647f820e06ae517c894edee Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sat, 4 Sep 2021 18:38:13 +0200 Subject: [PATCH 70/77] tests/python: removes duplicate test name --- tests/python/beam_elements/beamfields/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/beam_elements/beamfields/CMakeLists.txt b/tests/python/beam_elements/beamfields/CMakeLists.txt index c62e57862..974ce033e 100644 --- a/tests/python/beam_elements/beamfields/CMakeLists.txt +++ b/tests/python/beam_elements/beamfields/CMakeLists.txt @@ -1,8 +1,8 @@ -add_test( NAME Python_BeamElements_BeamFields_FaddeevaCERNLib +add_test( NAME Python_BeamElements_BeamFields_Faddeeva_CERNLib COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_cernlib.py WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python ) -add_test( NAME Python_BeamElements_BeamFields_FaddeevaCERNLib +add_test( NAME Python_BeamElements_BeamFields_Faddeeva_Alg680 COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_alg680.py WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python ) From a85db18862079d1176ff4beceedbbf36b1b5a906 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sat, 4 Sep 2021 18:39:09 +0200 Subject: [PATCH 71/77] common: fixes algorithm 680 related constants - Decrease SIXTRL_CERRF_ALG680_MIN_POW_2H_N to 2.22507385850720e-307 which is one order of magnitude above the smallest possible 64 Bit double precision number. This reduces the chance of hitting the limit close to the boundaries between the compute regions - consequently, the SIXTRL_CERRF_ALG680_MIN_TWO_H_VALUE has to be decreased to allow for SIXTRL_CERRF_ALG680_MIN_TWO_H_VALUE ^ ( N_S0 + N_S1 ) to still be >= than SIXTRL_CERRF_ALG680_MIN_POW_2H_N --- sixtracklib/common/be_beamfields/definitions.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sixtracklib/common/be_beamfields/definitions.h b/sixtracklib/common/be_beamfields/definitions.h index ac8e996c9..61d4e691d 100644 --- a/sixtracklib/common/be_beamfields/definitions.h +++ b/sixtracklib/common/be_beamfields/definitions.h @@ -275,11 +275,12 @@ extern "C" { #endif /* !defined( SIXTRL_CERRF_ALG680_K_Q1 ) */ #if !defined( SIXTRL_CRRF_ALG680_MIN_TWO_H_VALUE ) - #define SIXTRL_CERRF_ALG680_MIN_TWO_H_VALUE 2.22044604925031e-16 + #define SIXTRL_CERRF_ALG680_MIN_TWO_H_VALUE \ + 0.0000000499742512566561760675438910573983237175094 #endif /* !defined( SIXTRL_CRRF_ALG680_MIN_TWO_H_VALUE ) */ #if !defined( SIXTRL_CERRF_ALG680_MIN_POW_2H_N ) - #define SIXTRL_CERRF_ALG680_MIN_POW_2H_N 2.22044604925031e-16 + #define SIXTRL_CERRF_ALG680_MIN_POW_2H_N 2.22507385850720e-307 #endif /* !defined( SIXTRL_CERRF_ALG680_MIN_POW_2H_N ) */ #if !defined( SIXTRL_CERRF_ALG680_MIN_Y ) From 3b7fbe153f18f02c82297ad7464f745c42c151d7 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sun, 5 Sep 2021 15:06:56 +0200 Subject: [PATCH 72/77] common: adds faddeeva implementation --- sixtracklib/common/be_beamfields/faddeeva.c | 32 ++ sixtracklib/common/be_beamfields/faddeeva.h | 338 ++++++++++++++++---- 2 files changed, 309 insertions(+), 61 deletions(-) diff --git a/sixtracklib/common/be_beamfields/faddeeva.c b/sixtracklib/common/be_beamfields/faddeeva.c index 7a6da6cdf..5719b50f8 100644 --- a/sixtracklib/common/be_beamfields/faddeeva.c +++ b/sixtracklib/common/be_beamfields/faddeeva.c @@ -68,6 +68,34 @@ void NS(cerrf_cernlib_c_optimised_q1_ext)( #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ } +void NS(cerrf_cernlib_c_optimised_fixed_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT +{ + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(cerrf_cernlib_c_optimised_fixed_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ] ); + + #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + NS(cerrf_cernlib_c_optimised_fixed_q1)( x, y, out_real, out_imag, + &NS(CERRF_DAWSON_XI)[ 0 ],&NS(CERRF_DAWSON_FZ_XI)[ 0 ], + &NS(CERRF_DAWSON_NT_XI_REL_D14)[ 0 ], + &NS(CERRF_DAWSON_FZ_KK_XI)[ 0 ] ); + + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_APPROX || SIXTRL_CERRF_USE_DAWSON_COEFF == 0 */ + NS(cerrf_cernlib_c_optimised_fixed_q1)( x, y, out_real, out_imag ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +} + void NS(cerrf_alg680_q1_ext)( SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, @@ -167,6 +195,8 @@ void NS(cerrf_q1_ext)( NS(cerrf_cernlib_c_baseline_q1_ext)( x, y, out_real, out_imag ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) NS(cerrf_cernlib_c_upstream_q1_ext)( x, y, out_real, out_imag ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_FIXED ) + NS(cerrf_cernlib_c_optimised_q1_fixed_ext)( x, y, out_real, out_imag ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 ) NS(cerrf_alg680_q1_ext)( x, y, out_real, out_imag ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 ) @@ -200,6 +230,8 @@ void NS(cerrf_ext)( NS(cerrf_cernlib_c_baseline_q1_ext)( x, y, &Wx, &Wy ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) NS(cerrf_cernlib_c_upstream_q1_ext)( x, y, &Wx, &Wy ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_FIXED ) + NS(cerrf_cernlib_c_upstream_fixed_q1_ext)( x, y, &Wx, &Wy ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 ) NS(cerrf_alg680_q1_ext)( x, y, &Wx, &Wy ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 ) diff --git a/sixtracklib/common/be_beamfields/faddeeva.h b/sixtracklib/common/be_beamfields/faddeeva.h index f3175090c..047880b2f 100644 --- a/sixtracklib/common/be_beamfields/faddeeva.h +++ b/sixtracklib/common/be_beamfields/faddeeva.h @@ -54,6 +54,23 @@ SIXTRL_STATIC SIXTRL_FN void NS(cerrf_cernlib_c_optimised_q1)( #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ ) SIXTRL_NOEXCEPT; +SIXTRL_STATIC SIXTRL_FN void NS(cerrf_cernlib_c_optimised_fixed_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ +) SIXTRL_NOEXCEPT; + SIXTRL_STATIC SIXTRL_FN void NS(cerrf_alg680_q1)( SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, @@ -164,6 +181,12 @@ SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_cernlib_c_optimised_q1_ext)( SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag ) SIXTRL_NOEXCEPT; +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_cernlib_c_optimised_fixed_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_alg680_q1_ext)( SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, @@ -179,6 +202,24 @@ SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_abq2011_q1_ext)( SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag ) SIXTRL_NOEXCEPT; +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_abq2011_cf_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_abq2011_cf_daw_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + +SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_abq2011_root_q1_ext)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_imag +) SIXTRL_NOEXCEPT; + SIXTRL_EXTERN SIXTRL_HOST_FN void NS(cerrf_abq2011_q1_coeff_ext)( SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_real, @@ -638,46 +679,200 @@ SIXTRL_INLINE void NS(cerrf_cernlib_c_optimised_q1)( Sy = Ry * Wx + Rx * Sy; } - if( use_taylor_sum ) + *out_x = NS(MathConst_two_over_sqrt_pi)() * ( + ( use_taylor_sum ) ? Sx : Rx ); + + *out_y = NS(MathConst_two_over_sqrt_pi)() * ( + ( use_taylor_sum ) ? Sy : Ry ); + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + } + else + { + #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); + #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #else + NS(dawson_cerrf)( x, y, out_x, out_y ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ + } + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ +} + + +/** \fn void cerrf_cernlib_c_optimised_fixed_q1( double const, double const, double*, double* ) + * \brief calculates the Faddeeva function w(z) for z = x + i * y in Q1 + * + * \param[in] x real component of argument z + * \param[in] y imaginary component of argument z + * \param[out] out_x pointer to real component of result + * \param[out] out_y pointer to imanginary component of result + * + * \warning This function assumes that x and y are > 0 i.e., that z is + * from the first quadrant Q1 of the complex plane. Use cerrf if + * you need a more general function + * + * \note Based upon the algorithm developed by W. Gautschi 1970, + * "Efficient Computation of the Complex Error Function", + * SIAM Journal on Numerical Analysis, Vol. 7, Issue 1. 1970, + * pages 187-198, https://epubs.siam.org/doi/10.1137/0707012 + */ + +SIXTRL_INLINE void NS(cerrf_cernlib_c_optimised_fixed_q1)( + SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, + SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_y + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_XI_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT xi, + SIXTRL_CERRF_DAWSON_COEFF_FZ_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_xi, + SIXTRL_CERRF_DAWSON_COEFF_NT_DEC SIXTRL_INT32_TYPE const* SIXTRL_RESTRICT Fz_nt + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + , SIXTRL_CERRF_DAWSON_COEFF_TAYLOR_DEC SIXTRL_REAL_T const* SIXTRL_RESTRICT Fz_kk_xi + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) */ + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ +) SIXTRL_NOEXCEPT +{ + typedef SIXTRL_REAL_T real_type; + + /* This implementation corresponds closely to the previously used + * "CERNLib C" version, translated from the FORTRAN function written at + * CERN by K. Koelbig, Program C335, 1970. The main difference to + * Gautschi's formulation is a split in the main loop and the introduction + * of arrays to store the intermediate results as a consequence of this. + * The version implemented here should perform roughly equally well or even + * slightly better on modern out-of-order super-scalar CPUs but has + * drastically improved performance on GPUs and GPU-like systems. + * + * See also M. Bassetti and G.A. Erskine, + * "Closed expression for the electric field of a two-dimensional Gaussian + * charge density", CERN-ISR-TH/80-06; */ + + real_type h2_n; + real_type inv_h2 = ( real_type )1.0; + real_type y_plus_h = y; + int N = 0; + int nu; + + bool use_taylor_sum = ( + ( y < ( real_type )SIXTRL_CERRF_CERNLIB_Y0 ) && + ( x < ( real_type )SIXTRL_CERRF_CERNLIB_X0 ) ); + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + bool const use_dawson_approx = ( + ( x >= ( real_type )SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X ) && + ( x <= ( real_type )SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) && + ( y <= ( real_type )SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y ) ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + use_taylor_sum &= !use_dawson_approx; + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + /* R_0 ... rectangle with width SIXTRL_CERRF_CERNLIB_X0 and + * height SIXTRL_CERRF_CERNLIB_Y0. Inside R_0, w(z) is calculated using + * a truncated Taylor expansion. Outside, a Gauss--Hermite + * quadrature in the guise of a continuos fraction is used */ + + if( use_taylor_sum ) + { + y_plus_h += ( real_type )SIXTRL_CERRF_CERNLIB_H_0; + h2_n = ( real_type )2. * ( real_type )SIXTRL_CERRF_CERNLIB_H_0; + inv_h2 = ( real_type )1. / h2_n; + + N = ( int )SIXTRL_CERRF_CERNLIB_UPSTREAM_N; + h2_n = NS(pow_int_exp)( h2_n, N - 1 ); + use_taylor_sum = ( h2_n > ( + real_type )SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N ); + } + + nu = ( !use_taylor_sum ) + ? ( int )SIXTRL_CERRF_CERNLIB_K : ( int )SIXTRL_CERRF_CERNLIB_UPSTREAM_NU; + + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + if( !use_dawson_approx ) + { + #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + + int n = ( y > ( real_type )SIXTRL_CERRF_CERNLIB_MIN_Y ) ? nu : 0; + real_type nn = ( real_type )n; + real_type Rx = ( y > ( real_type )SIXTRL_CERRF_CERNLIB_MIN_Y ) + ? ( real_type )0.0 + : exp( -x * x ) / NS(MathConst_two_over_sqrt_pi)(); + + real_type temp, Ry, Sx, Sy, Wx, Wy; + Ry = Sx = Sy = ( real_type )0.0; + + /* z outside of R_0: continued fraction / Gauss - Hermite quadrature + * z inside of R_0: first iterations of recursion until n == N */ + for( ; n > N ; --n, nn -= ( real_type )1.0 ) { - Wx = NS(MathConst_two_over_sqrt_pi)() * Sx; - Wy = NS(MathConst_two_over_sqrt_pi)() * Sy; + Wx = y_plus_h + nn * Rx; + Wy = x - nn * Ry; + temp = ( Wx * Wx ) + ( Wy * Wy ); + Rx = ( real_type )0.5 * Wx; + Ry = ( real_type )0.5 * Wy; + temp = ( real_type )1.0 / temp; + Rx *= temp; + Ry *= temp; } - else + + /* loop rejects everything if z is not in R_0 because then n == 0 + * already; otherwise, N iterations until taylor expansion + * is summed up */ + for( ; n > 0 ; --n, nn -= ( real_type )1.0 ) { - Wx = NS(MathConst_two_over_sqrt_pi)() * Rx; - Wy = NS(MathConst_two_over_sqrt_pi)() * Ry; + Wx = y_plus_h + nn * Rx; + Wy = x - nn * Ry; + temp = ( Wx * Wx ) + ( Wy * Wy ); + Rx = ( real_type )0.5 * Wx; + Ry = ( real_type )0.5 * Wy; + temp = ( real_type )1.0 / temp; + Rx *= temp; + Ry *= temp; + + Wx = h2_n + Sx; + h2_n *= inv_h2; + Sx = Rx * Wx - Ry * Sy; + Sy = Ry * Wx + Rx * Sy; } + *out_x = NS(MathConst_two_over_sqrt_pi)() * ( + ( use_taylor_sum ) ? Sx : Rx ); + + *out_y = NS(MathConst_two_over_sqrt_pi)() * ( + ( use_taylor_sum ) ? Sy : Ry ); + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) } else { - SIXTRL_CERRF_RESULT_DEC temp_wz_re; - SIXTRL_CERRF_RESULT_DEC temp_wz_im; - #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) - NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, - xi, Fz_xi, Fz_nt ); + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) - NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt, Fz_kk_xi ); #else - NS(dawson_cerrf)( x, y, &temp_wz_re, &temp_wz_im ); + NS(dawson_cerrf)( x, y, out_x, out_y ); #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ - - Wx = temp_wz_re; - Wy = temp_wz_im; } #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ - - *out_x = Wx; - *out_y = Wy; } +/* ------------------------------------------------------------------------- */ + SIXTRL_INLINE void NS(cerrf_alg680_q1)( SIXTRL_REAL_T const x, SIXTRL_REAL_T const y, SIXTRL_CERRF_RESULT_DEC SIXTRL_REAL_T* SIXTRL_RESTRICT out_x, @@ -1027,9 +1222,6 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1)( real_type const y_squ = y * y; bool use_fourier_sum = true; - real_type wz_re = ( real_type )0.0; - real_type wz_im = ( real_type )0.0; - #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) bool const use_continued_fraction = ( ( x_squ + y_squ ) >= ( @@ -1070,6 +1262,9 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1)( real_type c3 = ( real_type )0.0; real_type c4 = ( real_type )1.0; + real_type wz_re = ( real_type )0.0; + real_type wz_im = ( real_type )0.0; + real_type const c1 = ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU * ( x + y ) * ( x - y ); @@ -1326,20 +1521,24 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1)( wz_re -= temp * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; wz_im += sum_im * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; + + *out_x = wz_re; + *out_y = wz_im; } #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) else if( use_continued_fraction ) { + real_type wz_re, wz_im; real_type rx = ( real_type )0.0; real_type ry = ( real_type )0.0; - real_type nn = ( real_type )CERRF_CONTINUOUS_FRACTION_K; + real_type nn = ( real_type )SIXTRL_CERRF_ABQ2011_CONT_FRACTION_K; for( ; nn > ( real_type )0. ; nn -= ( real_type )1. ) { - wz_re = in_y + nn * rx; - wz_im = in_x - nn * ry; + wz_re = y + nn * rx; + wz_im = x - nn * ry; temp = ( wz_re * wz_re + wz_im * wz_im ); rx = ( real_type )0.5 * wz_re; @@ -1350,8 +1549,8 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1)( ry *= temp; } - wz_re = NS(MathConst_two_over_sqrt_pi)() * rx; - wz_im = NS(MathConst_two_over_sqrt_pi)() * ry; + *out_x = NS(MathConst_two_over_sqrt_pi)() * rx; + *out_y = NS(MathConst_two_over_sqrt_pi)() * ry; } #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ @@ -1360,28 +1559,18 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1)( ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) else if( use_dawson_approx ) { - SIXTRL_CERRF_RESULT_DEC temp_re, temp_im; - #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) - NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, - xi, Fz_xi, Fz_nt ); + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) - NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, - xi, Fz_xi, Fz_nt, Fz_kk_xi ); + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt, Fz_kk_xi ); #else /* ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) */ - NS(dawson_cerrf)( x, y, &temp_wz_re, &temp_wz_im ); + NS(dawson_cerrf)( x, y, out_x, out_y ); #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ - - wz_re = temp_re; - wz_im = temp_im; } #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ - - *out_x = wz_re; - *out_y = wz_im; } @@ -1457,7 +1646,7 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1_coeff)( if( use_pole_taylor_approx ) { real_type d_pole_squ = y_squ; - N_POLE = ( int )NS(round)( x * ( real_type )CERRF_TM_OVER_PI ); + N_POLE = ( int )NS(round)( x * ( real_type )SIXTRL_CERRF_ABQ2011_TM_OVER_PI ); temp = x - ( ( real_type )SIXTRL_CERRF_ABQ2011_PI_OVER_TM * ( real_type )N_POLE ); @@ -1560,6 +1749,9 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1_coeff)( wz_re -= temp * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; wz_im += sum_im * ( real_type )SIXTRL_CERRF_ABQ2011_TM_SQU_OVER_SQRT_PI; + + *out_x = wz_re; + *out_y = wz_im; } #if defined( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION ) && \ ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) @@ -1567,12 +1759,12 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1_coeff)( { real_type rx = ( real_type )0.0; real_type ry = ( real_type )0.0; - real_type nn = ( real_type )CERRF_CONTINUOUS_FRACTION_K; + real_type nn = ( real_type )SIXTRL_CERRF_ABQ2011_CONT_FRACTION_K; for( ; nn > ( real_type )0. ; nn -= ( real_type )1. ) { - wz_re = in_y + nn * rx; - wz_im = in_x - nn * ry; + wz_re = y + nn * rx; + wz_im = x - nn * ry; temp = ( wz_re * wz_re + wz_im * wz_im ); rx = ( real_type )0.5 * wz_re; @@ -1583,8 +1775,8 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1_coeff)( ry *= temp; } - wz_re = NS(MathConst_two_over_sqrt_pi)() * rx; - wz_im = NS(MathConst_two_over_sqrt_pi)() * ry; + *out_x = NS(MathConst_two_over_sqrt_pi)() * rx; + *out_y = NS(MathConst_two_over_sqrt_pi)() * ry; } #endif /* ( SIXTRL_CERRF_ABQ2011_USE_CONT_FRACTION == 1 ) */ @@ -1592,22 +1784,15 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1_coeff)( ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) else if( use_dawson_approx ) { - SIXTRL_CERRF_RESULT_DEC temp_re, temp_im; - #if defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ ( SIXTRL_CERRF_USE_DAWSON_COEFF == 1 ) - NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, - xi, Fz_xi, Fz_nt ); + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); #elif defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) - NS(dawson_cerrf_coeff)( x, y, &temp_wz_re, &temp_wz_im, - xi, Fz_xi, Fz_nt, Fz_kk_xi ); + NS(dawson_cerrf_coeff)( x, y, out_x, out_y, xi, Fz_xi, Fz_nt, Fz_kk_xi ); #else - NS(dawson_cerrf)( x, y, &temp_wz_re, &temp_wz_im ); + NS(dawson_cerrf)( x, y, out_x, out_y ); #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF ) */ - - wz_re = temp_re; - wz_im = temp_im; } #endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ @@ -1655,12 +1840,12 @@ SIXTRL_INLINE void NS(cerrf_abq2011_q1_coeff)( wz_re -= b_n_value * dz_nn_im; wz_im += b_n_value * dz_nn_re; } + + *out_x = wz_re; + *out_y = wz_im; } #endif /* ( SIXTRL_CERRF_ABQ2011_USE_TAYLOR_POLE_APPROX == 1 ) */ - - *out_x = wz_re; - *out_y = wz_im; } @@ -1701,6 +1886,21 @@ SIXTRL_INLINE void NS(cerrf_q1)( NS(cerrf_cernlib_c_baseline_q1)( x, y, out_x, out_y ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) NS(cerrf_cernlib_c_upstream_q1)( x, y, out_x, out_y ); + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_FIXED ) + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_cernlib_c_optimised_fixed_q1)( + x, y, out_x, out_y, xi, Fz_xi, Fz_nt ); + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_cernlib_c_optimised_fixed_q1)( + x, y, out_x, out_y, xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_cernlib_c_optimised_fixed_q1)( x, y, out_x, out_y ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + #elif SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) \ @@ -1815,7 +2015,23 @@ SIXTRL_INLINE void NS(cerrf)( NS(cerrf_cernlib_c_baseline_q1)( x, y, &Wx, &Wy ); #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_UPSTREAM ) NS(cerrf_cernlib_c_upstream_q1)( x, y, &Wx, &Wy ); - #elif SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_CERNLIB_FIXED ) + #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ + defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ + ( SIXTRL_CERRF_USE_DAWSON_COEFF >= 1 ) + NS(cerrf_cernlib_c_optimised_fixed_q1)( + x, y, &Wx, &Wy, xi, Fz_xi, Fz_nt ); + #if ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) + NS(cerrf_cernlib_c_optimised_fixed_q1)( + x, y, &Wx, &Wy, xi, Fz_xi, Fz_nt, Fz_kk_xi ); + #endif /* ( SIXTRL_CERRF_USE_DAWSON_COEFF > 1 ) */ + #else /* !SIXTRL_CERRF_USE_DAWSON_COEFF */ + NS(cerrf_cernlib_c_optimised_fixed_q1)( x, y, &Wx, &Wy ); + #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ + + + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ALG680 ) #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ @@ -1828,7 +2044,7 @@ SIXTRL_INLINE void NS(cerrf)( NS(cerrf_alg680_q1)( x, y, &Wx, &Wy ); #endif /* SIXTRL_CERRF_USE_DAWSON_APPROX && SIXTRL_CERRF_USE_DAWSON_COEFF */ - #elif SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 + #elif ( SIXTRL_CERRF_METHOD == SIXTRL_CERRF_ABQ2011 ) #if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) && \ defined( SIXTRL_CERRF_USE_DAWSON_COEFF ) && \ From 1af93f8b97ffd55839ff8d7ba5a5c8d546d513fb Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sun, 5 Sep 2021 15:08:29 +0200 Subject: [PATCH 73/77] common: adds config options for fixed cernlib method --- .../common/be_beamfields/definitions.h | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/sixtracklib/common/be_beamfields/definitions.h b/sixtracklib/common/be_beamfields/definitions.h index 61d4e691d..06f7a230e 100644 --- a/sixtracklib/common/be_beamfields/definitions.h +++ b/sixtracklib/common/be_beamfields/definitions.h @@ -48,8 +48,9 @@ extern "C" { #define SIXTRL_CERRF_CERNLIB_OPTIMISED 0 #define SIXTRL_CERRF_CERNLIB_BASELINE 1 #define SIXTRL_CERRF_CERNLIB_UPSTREAM 2 -#define SIXTRL_CERRF_ALG680 3 -#define SIXTRL_CERRF_ABQ2011 4 +#define SIXTRL_CERRF_CERNLIB_FIXED 3 +#define SIXTRL_CERRF_ALG680 4 +#define SIXTRL_CERRF_ABQ2011 5 /* ------------------------------------------------------------------------- */ @@ -77,7 +78,7 @@ extern "C" { #endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_Y ) */ #if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_POW_2H_N ) - #define SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_POW_2H_N 2.22044604925031e-16 + #define SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_POW_2H_N 2.22507385850720e-307 #endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_MIN_POW_2H_N ) */ #if !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_H_0 ) @@ -96,6 +97,22 @@ extern "C" { #define SIXTRL_CERRF_CERNLIB_UPSTREAM_K 9 #endif /* !defined( SIXTRL_CERRF_CERNLIB_UPSTREAM_K ) */ +#if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ + ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) + + #if !defined( SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y ) + #define SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y 0.5 + #endif /* !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_Y ) */ + + #if !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X ) + #define SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X 0.0 + #endif /* !defined( SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MIN_X ) */ + + #if !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) + #define SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X 9.0 + #endif /* !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) */ +#endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ + /* ************************************************************************* */ /* Cernlib baseline and optimised: */ /* possible command line flags: @@ -125,7 +142,7 @@ extern "C" { #endif /* !defined( SIXTRL_CERRF_CERNLIB_MIN_Y ) */ #if !defined( SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N ) - #define SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N 2.22044604925031e-16 + #define SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N 2.22507385850720e-307 #endif /* !defined( SIXTRL_CERRF_CERNLIB_MIN_POW_2H_N ) */ #if !defined( SIXTRL_CERRF_CERNLIB_H_0 ) @@ -152,22 +169,6 @@ extern "C" { #define SIXTRL_CERRF_CERNLIB_K 9 #endif /* !defined( SIXTRL_CERRF_CERNLIB_K ) */ -#if defined( SIXTRL_CERRF_USE_DAWSON_APPROX ) && \ - ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) - - #if !defined( SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y ) - #define SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MAX_Y 0.5 - #endif /* !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_Y ) */ - - #if !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X ) - #define SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MIN_X 0.0 - #endif /* !defined( SIXTRL_CERRF_CERNLIB_USE_DAWSON_APPROX_MIN_X ) */ - - #if !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) - #define SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X 9.0 - #endif /* !defined( SIXTRL_CERRF_CERNLIB_DAWSON_APPROX_MAX_X ) */ -#endif /* ( SIXTRL_CERRF_USE_DAWSON_APPROX == 1 ) */ - /* ************************************************************************* */ /* ACM Algorithm 680: */ From 9713926bfbd9dee09c8b9835f2df76d9187079d5 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sun, 5 Sep 2021 15:08:56 +0200 Subject: [PATCH 74/77] common: adds symbols for pre-configured faddeeva methods --- sixtracklib/common/be_beamfields/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sixtracklib/common/be_beamfields/CMakeLists.txt b/sixtracklib/common/be_beamfields/CMakeLists.txt index 58469023e..4bed13236 100644 --- a/sixtracklib/common/be_beamfields/CMakeLists.txt +++ b/sixtracklib/common/be_beamfields/CMakeLists.txt @@ -1,6 +1,10 @@ set( SIXTRL_COMMON_BE_BEAMFIELDS_SOURCES be_beamfields.c faddeeva.c dawson_approx.c dawson_coeff_xn96_ntmax25.c - abq2011_coeff_nf24_tm12.c abq2011_coeff_poles_nf24_tm12_nt6.c ) + abq2011_coeff_nf24_tm12.c + abq2011_coeff_poles_nf24_tm12_nt6.c + abq2011_cf_q1.c + abq2011_cf_daw_q1.c + abq2011_root_q1.c ) set( SIXTRL_COMMON_BE_BEAMFIELDS_HEADERS be_beamfields.h track.h faddeeva.h abq2011_coeff.h From dca91613e2aaf0cf5e1b4c5002d071b3951f7d2b Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sun, 5 Sep 2021 15:11:49 +0200 Subject: [PATCH 75/77] python: adds symbols for additional faddeeva specialisation to python --- python/sixtracklib/stcommon.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/python/sixtracklib/stcommon.py b/python/sixtracklib/stcommon.py index 088695ee2..50d53e5d9 100644 --- a/python/sixtracklib/stcommon.py +++ b/python/sixtracklib/stcommon.py @@ -6710,6 +6710,12 @@ def Math_q_gauss(x, q, sqrt_beta, mu=None): st_cerrf_cernlib_c_optimised_q1.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] +st_cerrf_cernlib_c_optimised_fixed_q1 = \ + sixtracklib.st_cerrf_cernlib_c_optimised_fixed_q1_ext +st_cerrf_cernlib_c_optimised_fixed_q1.restype = None +st_cerrf_cernlib_c_optimised_fixed_q1.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, ] + st_cerrf_alg680_q1 = sixtracklib.st_cerrf_alg680_q1_ext st_cerrf_alg680_q1.restype = None st_cerrf_alg680_q1.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] @@ -6722,6 +6728,21 @@ def Math_q_gauss(x, q, sqrt_beta, mu=None): st_cerrf_abq2011_q1.restype = None st_cerrf_abq2011_q1.argtypes = [ ct.c_double, ct.c_double, st_double_p, st_double_p, ] +st_cerrf_abq2011_cf_q1 = sixtracklib.st_cerrf_abq2011_cf_q1_ext +st_cerrf_abq2011_cf_q1.restype = None +st_cerrf_abq2011_cf_q1.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf_abq2011_cf_daw_q1 = sixtracklib.st_cerrf_abq2011_cf_daw_q1_ext +st_cerrf_abq2011_cf_daw_q1.restype = None +st_cerrf_abq2011_cf_daw_q1.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, ] + +st_cerrf_abq2011_root_q1 = sixtracklib.st_cerrf_abq2011_root_q1_ext +st_cerrf_abq2011_root_q1.restype = None +st_cerrf_abq2011_root_q1.argtypes = [ + ct.c_double, ct.c_double, st_double_p, st_double_p, ] + st_cerrf_abq2011_q1_coeff = sixtracklib.st_cerrf_abq2011_q1_coeff_ext st_cerrf_abq2011_q1_coeff.restype = None st_cerrf_abq2011_q1_coeff.argtypes = [ From 4c7c6b831dea3ffd712abe93899431b3aec09e99 Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sun, 5 Sep 2021 15:13:34 +0200 Subject: [PATCH 76/77] tests/python: adds unit-test for fixed cernlib implementation --- .../beam_elements/beamfields/test_cernlib.py | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tests/python/beam_elements/beamfields/test_cernlib.py b/tests/python/beam_elements/beamfields/test_cernlib.py index b29935bda..8bd9e7c76 100644 --- a/tests/python/beam_elements/beamfields/test_cernlib.py +++ b/tests/python/beam_elements/beamfields/test_cernlib.py @@ -7,7 +7,8 @@ from sixtracklib.stcommon import \ st_cerrf_cernlib_c_baseline_q1, \ st_cerrf_cernlib_c_upstream_q1, \ - st_cerrf_cernlib_c_optimised_q1 + st_cerrf_cernlib_c_optimised_q1, \ + st_cerrf_cernlib_c_optimised_fixed_q1 if __name__ == '__main__': x_oe = np.logspace( -8, 8, 101, dtype=np.float64 ) @@ -70,7 +71,7 @@ assert np.fabs( wz_im_cmp - wz_im_cernlib_upstream ).max() < 1e-10 # -------------------------------------------------------------------------- - # CERNLib C upstream + # CERNLib C optimised wz_re_cernlib_optimised = np.arange( n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) @@ -91,4 +92,27 @@ assert np.fabs( wz_re_cmp - wz_re_cernlib_optimised ).max() < 1e-10 assert np.fabs( wz_im_cmp - wz_im_cernlib_optimised ).max() < 1e-10 + # -------------------------------------------------------------------------- + # CERNLib C optimised fixed + + wz_re_cernlib_optimised_fixed = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + wz_im_cernlib_optimised_fixed = np.arange( + n_im * n_re, dtype=np.float64 ).reshape( n_im, n_re ) + + out_re = ct.c_double( 0. ) + out_im = ct.c_double( 0. ) + + for jj, y in enumerate( y_oe ): + for ii, x in enumerate( x_oe ): + st_cerrf_cernlib_c_optimised_fixed_q1( + ct.c_double( x ), ct.c_double( y ), + ct.byref( out_re ), ct.byref( out_im ) ) + wz_re_cernlib_optimised_fixed[ jj, ii ] = np.float64( out_re ) + wz_im_cernlib_optimised_fixed[ jj, ii ] = np.float64( out_im ) + + assert np.fabs( wz_re_cmp - wz_re_cernlib_optimised_fixed ).max() < 1e-10 + assert np.fabs( wz_im_cmp - wz_im_cernlib_optimised_fixed ).max() < 1e-10 + From 929c7a2266f2c5cb3ae00ef59cee61910505942f Mon Sep 17 00:00:00 2001 From: Martin Schwinzerl Date: Sun, 5 Sep 2021 15:14:20 +0200 Subject: [PATCH 77/77] tests/python: renders test for algorithm 680 stricter --- tests/python/beam_elements/beamfields/test_alg680.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/python/beam_elements/beamfields/test_alg680.py b/tests/python/beam_elements/beamfields/test_alg680.py index 98865ed55..cd51ac862 100644 --- a/tests/python/beam_elements/beamfields/test_alg680.py +++ b/tests/python/beam_elements/beamfields/test_alg680.py @@ -38,8 +38,5 @@ wz_re_alg680[ jj, ii ] = np.float64( out_re ) wz_im_alg680[ jj, ii ] = np.float64( out_im ) - print( np.fabs( wz_re_cmp - wz_re_alg680 ).max() ) - print( np.fabs( wz_im_cmp - wz_im_alg680 ).max() ) - - assert np.fabs( wz_re_cmp - wz_re_alg680 ).max() < 1e-10 - assert np.fabs( wz_im_cmp - wz_im_alg680 ).max() < 1e-10 + assert np.fabs( wz_re_cmp - wz_re_alg680 ).max() < 0.5e-14 + assert np.fabs( wz_im_cmp - wz_im_alg680 ).max() < 0.5e-14