diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d604c3..92eca2a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,6 @@ option( EXCHCXX_ENABLE_SYCL "Enable Device Code (SYCL)" OFF ) option( EXCHCXX_ENABLE_LIBXC "Enable Libxc Backend" ON ) option( BUILD_SHARED_LIBS "Build Shared Libs" OFF ) - # Decided if we're compiling device bindings if( EXCHCXX_ENABLE_CUDA OR EXCHCXX_ENABLE_SYCL OR EXCHCXX_ENABLE_HIP ) set( EXCHCXX_ENABLE_DEVICE TRUE CACHE BOOL "Enable Device Code" ) @@ -30,6 +29,12 @@ if( EXCHCXX_ENABLE_SYCL AND EXCHCXX_ENABLE_HIP ) endif() +if(EXCHCXX_ENABLE_SYCL) + # e.g. intel_gpu_pvc | nvidia_gpu_sm_80 | nvidia_gpu_sm_90 | amd_gpu_gfx90a | amd_gpu_gfx942 + set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Alias for -fsycl-targets (see Users Manual)") +endif() + + # Append local cmake directory to find CMAKE Modules if( CMAKE_MODULE_PATH ) list( APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") diff --git a/README.md b/README.md index 4c9fc86..34617a3 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ small subset of XC functionals which may be evaluated either on the host (CPU) or device (GPU, FPGA, etc). Currently GPU support is provided through the [CUDA](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html) for NVIDIA GPUs, [HIP](https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP-GUIDE.html) for -AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental) -for generic accelerator backends (including Intel GPUs). +AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental, +supports only oneAPI implementaion) for generic accelerator backends (including Intel GPUs). ExchCXX is a work in progress. Its development has been funded by the U.S. diff --git a/include/exchcxx/impl/builtin/util.hpp b/include/exchcxx/impl/builtin/util.hpp index 89d8bda..f7dbd8a 100644 --- a/include/exchcxx/impl/builtin/util.hpp +++ b/include/exchcxx/impl/builtin/util.hpp @@ -58,6 +58,13 @@ namespace ExchCXX { +#if defined(__CUDACC__) || defined(__HIPCC__) +#define EXCHCXX_READONLY_TABLE static __device__ +#elif defined(__SYCL_DEVICE_ONLY__) +#define EXCHCXX_READONLY_TABLE inline constexpr +#else +#define EXCHCXX_READONLY_TABLE static +#endif #if defined(__CUDACC__) || defined(__HIPCC__) @@ -154,10 +161,7 @@ static double AE11_data[39] = { -0.000000000000000024, -0.000000000000000201, -0.000000000000000082, 0.000000000000000017 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double AE12_data[25] = { +EXCHCXX_READONLY_TABLE double AE12_data[25] = { 0.582417495134726740, -0.158348850905782750, -0.006764275590323141, 0.005125843950185725, 0.000435232492169391, -0.000143613366305483, -0.000041801320556301, -0.000002713395758640, 0.000001151381913647, 0.000000420650022012, 0.000000066581901391, 0.000000000662143777, -0.000000002844104870, -0.000000000940724197, -0.000000000177476602, @@ -165,30 +169,21 @@ static double AE12_data[25] = { 0.000000000000010707, -0.000000000000000537, -0.000000000000000716, -0.000000000000000244, -0.000000000000000058 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double E11_data[19] = { +EXCHCXX_READONLY_TABLE double E11_data[19] = { -16.11346165557149402600, 7.79407277874268027690, -1.95540581886314195070, 0.37337293866277945612, -0.05692503191092901938, 0.00721107776966009185, -0.00078104901449841593, 0.00007388093356262168, -0.00000620286187580820, 0.00000046816002303176, -0.00000003209288853329, 0.00000000201519974874, -0.00000000011673686816, 0.00000000000627627066, -0.00000000000031481541, 0.00000000000001479904, -0.00000000000000065457, 0.00000000000000002733, -0.00000000000000000108 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double E12_data[16] = { +EXCHCXX_READONLY_TABLE double E12_data[16] = { -0.03739021479220279500, 0.04272398606220957700, -0.13031820798497005440, 0.01441912402469889073, -0.00134617078051068022, 0.00010731029253063780, -0.00000742999951611943, 0.00000045377325690753, -0.00000002476417211390, 0.00000000122076581374, -0.00000000005485141480, 0.00000000000226362142, -0.00000000000008635897, 0.00000000000000306291, -0.00000000000000010148, 0.00000000000000000315 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double AE13_data[25] = { +EXCHCXX_READONLY_TABLE double AE13_data[25] = { -0.605773246640603460, -0.112535243483660900, 0.013432266247902779, -0.001926845187381145, 0.000309118337720603, -0.000053564132129618, 0.000009827812880247, -0.000001885368984916, 0.000000374943193568, -0.000000076823455870, 0.000000016143270567, -0.000000003466802211, 0.000000000758754209, -0.000000000168864333, 0.000000000038145706, @@ -196,10 +191,7 @@ static double AE13_data[25] = { 0.000000000000006457, -0.000000000000001568, 0.000000000000000383, -0.000000000000000094, 0.000000000000000023 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double AE14_data[26] = { +EXCHCXX_READONLY_TABLE double AE14_data[26] = { -0.18929180007530170, -0.08648117855259871, 0.00722410154374659, -0.00080975594575573, 0.00010999134432661, -0.00001717332998937, 0.00000298562751447, -0.00000056596491457, 0.00000011526808397, -0.00000002495030440, 0.00000000569232420, -0.00000000135995766, 0.00000000033846628, -0.00000000008737853, 0.00000000002331588, diff --git a/include/exchcxx/util/exchcxx_macros.hpp b/include/exchcxx/util/exchcxx_macros.hpp index da84dc5..7fbf0fa 100644 --- a/include/exchcxx/util/exchcxx_macros.hpp +++ b/include/exchcxx/util/exchcxx_macros.hpp @@ -79,7 +79,7 @@ #define DEVICE_PARAMS sycl::queue* queue #define DEVICE_PARAMS_NOTYPE queue - #define SYCL_KERNEL_PARAMS sycl::id<1> idx + #define SYCL_KERNEL_PARAMS sycl::id<1> tid #endif diff --git a/src/sycl/builtin_sycl.cxx b/src/sycl/builtin_sycl.cxx index 0225fab..8631f1b 100644 --- a/src/sycl/builtin_sycl.cxx +++ b/src/sycl/builtin_sycl.cxx @@ -56,296 +56,919 @@ namespace ExchCXX { namespace detail { +template class device_eval_exc_helper_unpolar_kernel_name; +template class device_eval_exc_helper_polar_kernel_name; +template class device_eval_exc_vxc_helper_unpolar_kernel_name; +template class device_eval_exc_vxc_helper_polar_kernel_name; +template class device_eval_fxc_helper_unpolar_kernel_name; +template class device_eval_fxc_helper_polar_kernel_name; +template class device_eval_vxc_fxc_helper_unpolar_kernel_name; +template class device_eval_vxc_fxc_helper_polar_kernel_name; +template class device_eval_exc_inc_helper_unpolar_kernel_name; +template class device_eval_exc_inc_helper_polar_kernel_name; +template class device_eval_exc_vxc_inc_helper_unpolar_kernel_name; +template class device_eval_exc_vxc_inc_helper_polar_kernel_name; +template class device_eval_fxc_inc_helper_unpolar_kernel_name; +template class device_eval_fxc_inc_helper_polar_kernel_name; +template class device_eval_vxc_fxc_inc_helper_unpolar_kernel_name; +template class device_eval_vxc_fxc_inc_helper_polar_kernel_name; + + template -inline LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_exc_unpolar( rho[tid], eps[tid] ); + +} - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_unpolar( rho_use, eps[idx] ); +template +__attribute__((always_inline)) LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + traits::eval_exc_polar( rho_i[0], rho_i[1], eps[tid] ); } template -inline LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { +__attribute__((always_inline)) LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_exc_vxc_unpolar( rho[tid], eps[tid], vxc[tid] ); - auto rho_i = rho + 2*idx; +} - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); +template +__attribute__((always_inline)) LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { - traits::eval_exc_polar( rho_a_use, rho_b_use, eps[idx] ); + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; + + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], eps[tid], + vxc_i[0], vxc_i[1] ); } + template -inline LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_fxc_unpolar( rho[tid], fxc[tid] ); - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_vxc_unpolar( rho_use, eps[idx], vxc[idx] ); +} + +template +__attribute__((always_inline)) LDA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto v2rho2_i = fxc + 3*tid; + + traits::eval_fxc_polar( rho_i[0], rho_i[1], v2rho2_i[0], + v2rho2_i[1], v2rho2_i[2] ); } template -inline LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { +__attribute__((always_inline)) LDA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_vxc_fxc_unpolar( rho[tid], vxc[tid], fxc[tid] ); + +} + +template +__attribute__((always_inline)) LDA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; + auto v2rho2_i = fxc + 3*tid; + + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], vxc_i[0], vxc_i[1], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2] ); - auto rho_i = rho + 2*idx; - auto vxc_i = vxc + 2*idx; +} + +template +__attribute__((always_inline)) LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); + using traits = kernel_traits; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, eps[idx], - vxc_i[0], vxc_i[1] ); + double e; + traits::eval_exc_unpolar( rho[tid], e ); + eps[tid] += scal_fact * e; } template -inline LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { using traits = kernel_traits; + auto rho_i = rho + 2*tid; double e; + traits::eval_exc_polar( rho_i[0], rho_i[1], e ); - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_unpolar( rho_use, e ); - eps[idx] += scal_fact * e; + eps[tid] += scal_fact * e; } template -inline LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - auto rho_i = rho + 2*idx; + double e,v; + traits::eval_exc_vxc_unpolar( rho[tid], e, v ); + eps[tid] += scal_fact * e; + vxc[tid] += scal_fact * v; - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); +} - double e; - traits::eval_exc_polar( rho_a_use, rho_b_use, e ); +template +__attribute__((always_inline)) LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; - eps[idx] += scal_fact * e; + double v_a, v_b, e; + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], e, v_a, v_b); + eps[tid] += scal_fact * e; + vxc_i[0] += scal_fact * v_a; + vxc_i[1] += scal_fact * v_b; } template -inline LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + double f; + traits::eval_fxc_unpolar( rho[tid], f ); + fxc[tid] += scal_fact * f; +} +template +__attribute__((always_inline)) LDA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_polar_kernel ) { using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto fxc_i = fxc + 3*tid; + double f0, f1, f2; + traits::eval_fxc_polar( rho_i[0], rho_i[1], f0, f1, f2 ); + fxc_i[0] += scal_fact * f0; + fxc_i[1] += scal_fact * f1; + fxc_i[2] += scal_fact * f2; +} - double e,v; +template +__attribute__((always_inline)) LDA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + double v, f; + traits::eval_vxc_fxc_unpolar( rho[tid], v, f ); + vxc[tid] += scal_fact * v; + fxc[tid] += scal_fact * f; +} - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_vxc_unpolar( rho_use, e, v ); - eps[idx] += scal_fact * e; - vxc[idx] += scal_fact * v; +template +__attribute__((always_inline)) LDA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_polar_kernel ) { + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; + auto fxc_i = fxc + 3*tid; + double v0, v1, f0, f1, f2; + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], v0, v1, f0, f1, f2 ); + vxc_i[0] += scal_fact * v0; + vxc_i[1] += scal_fact * v1; + fxc_i[0] += scal_fact * f0; + fxc_i[1] += scal_fact * f1; + fxc_i[2] += scal_fact * f2; +} + + + + + + +template +__attribute__((always_inline)) GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + traits::eval_exc_unpolar( rho[tid], sigma[tid], eps[tid] ); } template -inline LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], eps[tid] ); + +} + +template +__attribute__((always_inline)) GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], eps[tid], + vrho[tid], vsigma[tid] ); + +} + +template +__attribute__((always_inline)) GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], eps[tid], vrho_i[0], vrho_i[1], + vsigma_i[0], vsigma_i[1], vsigma_i[2] ); + +} + +template +__attribute__((always_inline)) GGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_fxc_unpolar( rho[tid], sigma[tid], v2rho2[tid], v2rhosigma[tid], v2sigma2[tid] ); - auto rho_i = rho + 2*idx; - auto vxc_i = vxc + 2*idx; +} - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); +template +__attribute__((always_inline)) GGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_polar_kernel ) { - double v_a, v_b, e; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, e, v_a, v_b); - eps[idx] += scal_fact * e; - vxc_i[0] += scal_fact * v_a; - vxc_i[1] += scal_fact * v_b; + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + + + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5] ); + +} + +template +__attribute__((always_inline)) GGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], vrho[tid], vsigma[tid], + v2rho2[tid], v2rhosigma[tid], v2sigma2[tid] ); } template -inline GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { +__attribute__((always_inline)) GGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + vrho_i[0], vrho_i[1], vsigma_i[0], vsigma_i[1], vsigma_i[2], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5] ); + +} + + +template +__attribute__((always_inline)) GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); - traits::eval_exc_unpolar( rho_use, sigma_use, eps[idx] ); + double e; + traits::eval_exc_unpolar( rho[tid], sigma[tid], e ); + eps[tid] += scal_fact * e; } template -inline GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { +__attribute__((always_inline)) GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + double e; + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], e ); + eps[tid] += scal_fact * e; + +} + +template +__attribute__((always_inline)) GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; + double e, vr, vs; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], e, vr, vs ); + eps[tid] += scal_fact * e; + vrho[tid] += scal_fact * vr; + vsigma[tid] += scal_fact * vs; + +} + +template +__attribute__((always_inline)) GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; - traits::eval_exc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, eps[idx] ); + double e, vra, vrb, vsaa,vsab,vsbb; + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], e, vra, vrb, vsaa, vsab, vsbb ); + + eps[tid] += scal_fact * e; + vrho_i[0] += scal_fact * vra; + vrho_i[1] += scal_fact * vrb; + vsigma_i[0] += scal_fact * vsaa; + vsigma_i[1] += scal_fact * vsab; + vsigma_i[2] += scal_fact * vsbb; } + template -inline GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { +__attribute__((always_inline)) GGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + double f2, f3, f4; + traits::eval_fxc_unpolar( rho[tid], sigma[tid], f2, f3, f4 ); + v2rho2[tid] += scal_fact * f2; + v2rhosigma[tid] += scal_fact * f3; + v2sigma2[tid] += scal_fact * f4; +} +template +__attribute__((always_inline)) GGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_polar_kernel ) { using traits = kernel_traits; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); - traits::eval_exc_vxc_unpolar( rho_use, sigma_use, eps[idx], - vrho[idx], vsigma[idx] ); + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + double f2[3], f3[6], f4[6]; + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + f2[0], f2[1], f2[2], + f3[0], f3[1], f3[2], f3[3], f3[4], f3[5], + f4[0], f4[1], f4[2], f4[3], f4[4], f4[5] ); + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f3[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f4[i]; +} +template +__attribute__((always_inline)) GGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + double v, s, f2, f3, f4; + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], v, s, f2, f3, f4 ); + vrho[tid] += scal_fact * v; + vsigma[tid] += scal_fact * s; + v2rho2[tid] += scal_fact * f2; + v2rhosigma[tid] += scal_fact * f3; + v2sigma2[tid] += scal_fact * f4; } template -inline GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { +__attribute__((always_inline)) GGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_polar_kernel ) { + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + double v[2], s[3], f2[3], f3[6], f4[6]; + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + v[0], v[1], s[0], s[1], s[2], + f2[0], f2[1], f2[2], + f3[0], f3[1], f3[2], f3[3], f3[4], f3[5], + f4[0], f4[1], f4[2], f4[3], f4[4], f4[5] ); + for(int i=0;i<2;++i) vrho_i[i] += scal_fact * v[i]; + for(int i=0;i<3;++i) vsigma_i[i] += scal_fact * s[i]; + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f3[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f4[i]; +} + + + + + + + + + + + + + + +template +__attribute__((always_inline)) MGGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + traits::eval_exc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], eps[tid] ); + +} + + +template +__attribute__((always_inline)) MGGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : nullptr; + auto* tau_i = tau + 2*tid; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; - auto* vrho_i = vrho + 2*idx; - auto* vsigma_i = vsigma + 3*idx; + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], eps[tid] ); - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); +} + +template +__attribute__((always_inline)) MGGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, eps[idx], vrho_i[0], vrho_i[1], - vsigma_i[0], vsigma_i[1], vsigma_i[2] ); + double dummy; + auto& vlapl_return = traits::needs_laplacian ? vlapl[tid] : dummy; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + eps[tid], vrho[tid], vsigma[tid], vlapl_return, vtau[tid] ); } +template +__attribute__((always_inline)) MGGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { + + using traits = kernel_traits; + + double dummy_vlapl[2]; + + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* vlapl_i = traits::needs_laplacian ? vlapl + 2*tid : dummy_vlapl; + auto* vtau_i = vtau + 2*tid; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], eps[tid], vrho_i[0], vrho_i[1], vsigma_i[0], vsigma_i[1], + vsigma_i[2], vlapl_i[0], vlapl_i[1], vtau_i[0], vtau_i[1] ); + +} template -inline GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) MGGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_unpolar_kernel ) { using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double local_v2rholapl, local_v2sigmalapl, local_v2lapl2, local_v2lapltau; - double e; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); + auto& v2rholapl_return = traits::needs_laplacian ? v2rholapl[tid] : local_v2rholapl; + auto& v2sigmalapl_return = traits::needs_laplacian ? v2sigmalapl[tid] : local_v2sigmalapl; + auto& v2lapl2_return = traits::needs_laplacian ? v2lapl2[tid] : local_v2lapl2; + auto& v2lapltau_return = traits::needs_laplacian ? v2lapltau[tid] : local_v2lapltau; - traits::eval_exc_unpolar( rho_use, sigma_use, e ); - eps[idx] += scal_fact * e; + traits::eval_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + v2rho2[tid], v2rhosigma[tid], v2rholapl_return, v2rhotau[tid], + v2sigma2[tid], v2sigmalapl_return, v2sigmatau[tid], + v2lapl2_return, v2lapltau_return, v2tau2[tid] ); } template -inline GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) MGGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_polar_kernel ) { using traits = kernel_traits; + double dummy_v2rholapl[4]; + double dummy_v2sigmalapl[6]; + double dummy_v2lapl2[3]; + double dummy_v2lapltau[4]; + + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + auto* v2rholapl_i = traits::needs_laplacian ? (v2rholapl + 4 * tid) : dummy_v2rholapl; + auto* v2sigmalapl_i = traits::needs_laplacian ? (v2sigmalapl + 6 * tid) : dummy_v2sigmalapl; + auto* v2lapl2_i = traits::needs_laplacian ? (v2lapl2 + 3 * tid) : dummy_v2lapl2; + auto* v2lapltau_i = traits::needs_laplacian ? (v2lapltau + 4 * tid) : dummy_v2lapltau; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2rholapl_i[0], v2rholapl_i[1], v2rholapl_i[2], v2rholapl_i[3], + v2rhotau_i[0], v2rhotau_i[1], v2rhotau_i[2], v2rhotau_i[3], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5], + v2sigmalapl_i[0], v2sigmalapl_i[1], v2sigmalapl_i[2], + v2sigmalapl_i[3], v2sigmalapl_i[4], v2sigmalapl_i[5], + v2sigmatau_i[0], v2sigmatau_i[1], v2sigmatau_i[2], + v2sigmatau_i[3], v2sigmatau_i[4], v2sigmatau_i[5], + v2lapl2_i[0], v2lapl2_i[1], v2lapl2_i[2], + v2lapltau_i[0], v2lapltau_i[1], v2lapltau_i[2], v2lapltau_i[3], + v2tau2_i[0], v2tau2_i[1], v2tau2_i[2] ); +} - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; +template +__attribute__((always_inline)) MGGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double dummy_v2rholapl, dummy_v2sigmalapl, dummy_v2lapl2, dummy_v2lapltau, dummy_vlapl; + auto& vlapl_return = traits::needs_laplacian ? vlapl[tid] : dummy_vlapl; + auto& v2rholapl_return = traits::needs_laplacian ? v2rholapl[tid] : dummy_v2rholapl; + auto& v2sigmalapl_return = traits::needs_laplacian ? v2sigmalapl[tid] : dummy_v2sigmalapl; + auto& v2lapl2_return = traits::needs_laplacian ? v2lapl2[tid] : dummy_v2lapl2; + auto& v2lapltau_return = traits::needs_laplacian ? v2lapltau[tid] : dummy_v2lapltau; + + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + vrho[tid], vsigma[tid], vlapl_return, vtau[tid], + v2rho2[tid], v2rhosigma[tid], v2rholapl_return, + v2rhotau[tid], v2sigma2[tid], v2sigmalapl_return, + v2sigmatau[tid], v2lapl2_return, v2lapltau_return, + v2tau2[tid] ); - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); +} + +template +__attribute__((always_inline)) MGGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_polar_kernel ) { + + using traits = kernel_traits; + double dummy_vlapl[2]; + double dummy_v2rholapl[4]; + double dummy_v2sigmalapl[6]; + double dummy_v2lapl2[3]; + double dummy_v2lapltau[4]; + + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* vrho_i = vrho + 2 * tid; + auto* vsigma_i = vsigma + 3 * tid; + auto* vtau_i = vtau + 2 * tid; + + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + auto* vlapl_i = traits::needs_laplacian ? (vlapl + 2 * tid) : dummy_vlapl; + auto* v2rholapl_i = traits::needs_laplacian ? (v2rholapl + 4 * tid) : dummy_v2rholapl; + auto* v2sigmalapl_i = traits::needs_laplacian ? (v2sigmalapl + 6 * tid) : dummy_v2sigmalapl; + auto* v2lapl2_i = traits::needs_laplacian ? (v2lapl2 + 3 * tid) : dummy_v2lapl2; + auto* v2lapltau_i = traits::needs_laplacian ? (v2lapltau + 4 * tid) : dummy_v2lapltau; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + vrho_i[0], vrho_i[1], vsigma_i[0], vsigma_i[1], vsigma_i[2], + vlapl_i[0], vlapl_i[1], vtau_i[0], vtau_i[1], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2rholapl_i[0], v2rholapl_i[1], v2rholapl_i[2], v2rholapl_i[3], + v2rhotau_i[0], v2rhotau_i[1], v2rhotau_i[2], v2rhotau_i[3], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5], + v2sigmalapl_i[0], v2sigmalapl_i[1], v2sigmalapl_i[2], + v2sigmalapl_i[3], v2sigmalapl_i[4], v2sigmalapl_i[5], + v2sigmatau_i[0], v2sigmatau_i[1], v2sigmatau_i[2], + v2sigmatau_i[3], v2sigmatau_i[4], v2sigmatau_i[5], + v2lapl2_i[0], v2lapl2_i[1], v2lapl2_i[2], + v2lapltau_i[0], v2lapltau_i[1], v2lapltau_i[2], v2lapltau_i[3], + v2tau2_i[0], v2tau2_i[1], v2tau2_i[2] ); +} + +template +__attribute__((always_inline)) MGGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { + + using traits = kernel_traits; double e; - traits::eval_exc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, e ); - eps[idx] += scal_fact * e; + + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + traits::eval_exc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], e ); + eps[tid] += scal_fact * e; } template -inline GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) MGGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + double e; + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], e ); + eps[tid] += scal_fact * e; + +} + +template +__attribute__((always_inline)) MGGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - double e, vr, vs; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); + double e, vr, vs, vl, vt; + + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; - traits::eval_exc_vxc_unpolar( rho_use, sigma_use, e, vr, vs ); - eps[idx] += scal_fact * e; - vrho[idx] += scal_fact * vr; - vsigma[idx] += scal_fact * vs; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + e, vr, vs, vl, vt ); + eps[tid] += scal_fact * e; + vrho[tid] += scal_fact * vr; + vsigma[tid] += scal_fact * vs; + vtau[tid] += scal_fact * vt; + if(traits::needs_laplacian) vlapl[tid] += scal_fact * vl; } template -inline GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) MGGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { using traits = kernel_traits; - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; - auto* vrho_i = vrho + 2*idx; - auto* vsigma_i = vsigma + 3*idx; + double dummy_vlapl[2]; - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* vlapl_i = traits::needs_laplacian ? vlapl + 2*tid : dummy_vlapl; + auto* vtau_i = vtau + 2*tid; - double e, vra, vrb, vsaa,vsab,vsbb; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, e, vra, vrb, vsaa, vsab, vsbb ); + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; - eps[idx] += scal_fact * e; + + double e, vra, vrb, vsaa,vsab,vsbb, vla, vlb, vta, vtb; + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], e, vra, vrb, vsaa, vsab, vsbb, vla, vlb, vta, vtb ); + + eps[tid] += scal_fact * e; vrho_i[0] += scal_fact * vra; vrho_i[1] += scal_fact * vrb; vsigma_i[0] += scal_fact * vsaa; vsigma_i[1] += scal_fact * vsab; vsigma_i[2] += scal_fact * vsbb; + vtau_i[0] += scal_fact * vta; + vtau_i[1] += scal_fact * vtb; + if(traits::needs_laplacian) { + vlapl_i[0] += scal_fact * vla; + vlapl_i[1] += scal_fact * vlb; + } } +template +__attribute__((always_inline)) MGGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double f_rho2, f_rhosigma, f_rholapl, f_rhotau, f_sigma2, f_sigmalapl, f_sigmatau, f_lapl2, f_lapltau, f_tau2; + traits::eval_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + f_rho2, f_rhosigma, f_rholapl, f_rhotau, + f_sigma2, f_sigmalapl, f_sigmatau, + f_lapl2, f_lapltau, f_tau2 ); + v2rho2[tid] += scal_fact * f_rho2; + v2rhosigma[tid] += scal_fact * f_rhosigma; + v2rhotau[tid] += scal_fact * f_rhotau; + v2sigma2[tid] += scal_fact * f_sigma2; + v2sigmatau[tid] += scal_fact * f_sigmatau; + v2tau2[tid] += scal_fact * f_tau2; + if(traits::needs_laplacian) { + v2rholapl[tid] += scal_fact * f_rholapl; + v2sigmalapl[tid] += scal_fact * f_sigmalapl; + v2lapl2[tid] += scal_fact * f_lapl2; + v2lapltau[tid] += scal_fact * f_lapltau; + } +} +template +__attribute__((always_inline)) MGGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_polar_kernel ) { + using traits = kernel_traits; + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + double f_rho2[3], f_rhosigma[6], f_rholapl[4], f_rhotau[4], f_sigma2[6], f_sigmalapl[6], f_sigmatau[6], f_lapl2[3], f_lapltau[4], f_tau2[3]; + + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + f_rho2[0], f_rho2[1], f_rho2[2], + f_rhosigma[0], f_rhosigma[1], f_rhosigma[2], f_rhosigma[3], f_rhosigma[4], f_rhosigma[5], + f_rholapl[0], f_rholapl[1], f_rholapl[2], f_rholapl[3], + f_rhotau[0], f_rhotau[1], f_rhotau[2], f_rhotau[3], + f_sigma2[0], f_sigma2[1], f_sigma2[2], f_sigma2[3], f_sigma2[4], f_sigma2[5], + f_sigmalapl[0], f_sigmalapl[1], f_sigmalapl[2], f_sigmalapl[3], f_sigmalapl[4], f_sigmalapl[5], + f_sigmatau[0], f_sigmatau[1], f_sigmatau[2], f_sigmatau[3], f_sigmatau[4], f_sigmatau[5], + f_lapl2[0], f_lapl2[1], f_lapl2[2], + f_lapltau[0], f_lapltau[1], f_lapltau[2], f_lapltau[3], + f_tau2[0], f_tau2[1], f_tau2[2] ); + + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f_rho2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f_rhosigma[i]; + for(int i=0;i<4;++i) v2rhotau_i[i] += scal_fact * f_rhotau[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f_sigma2[i]; + for(int i=0;i<6;++i) v2sigmatau_i[i] += scal_fact * f_sigmatau[i]; + for(int i=0;i<3;++i) v2tau2_i[i] += scal_fact * f_tau2[i]; + + if(traits::needs_laplacian) { + auto* v2rholapl_i = v2rholapl + 4 * tid; + auto* v2sigmalapl_i = v2sigmalapl + 6 * tid; + auto* v2lapl2_i = v2lapl2 + 3 * tid; + auto* v2lapltau_i = v2lapltau + 4 * tid; + for(int i=0;i<4;++i) v2rholapl_i[i] += scal_fact * f_rholapl[i]; + for(int i=0;i<6;++i) v2sigmalapl_i[i] += scal_fact * f_sigmalapl[i]; + for(int i=0;i<3;++i) v2lapl2_i[i] += scal_fact * f_lapl2[i]; + for(int i=0;i<4;++i) v2lapltau_i[i] += scal_fact * f_lapltau[i]; + } +} +template +__attribute__((always_inline)) MGGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double f_rho2, f_rhosigma, f_rholapl, f_rhotau, f_sigma2, f_sigmalapl, f_sigmatau, f_lapl2, f_lapltau, f_tau2; + double vr, vs, vl, vt; + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + vr, vs, vl, vt, + f_rho2, f_rhosigma, f_rholapl, f_rhotau, + f_sigma2, f_sigmalapl, f_sigmatau, + f_lapl2, f_lapltau, f_tau2); + + vrho[tid] += scal_fact * vr; + vsigma[tid] += scal_fact * vs; + vtau[tid] += scal_fact * vt; + v2rho2[tid] += scal_fact * f_rho2; + v2rhosigma[tid] += scal_fact * f_rhosigma; + v2rhotau[tid] += scal_fact * f_rhotau; + v2sigma2[tid] += scal_fact * f_sigma2; + v2sigmatau[tid] += scal_fact * f_sigmatau; + v2tau2[tid] += scal_fact * f_tau2; + + if(traits::needs_laplacian) { + vlapl[tid] += scal_fact * vl; + v2rholapl[tid] += scal_fact * f_rholapl; + v2sigmalapl[tid] += scal_fact * f_sigmalapl; + v2lapl2[tid] += scal_fact * f_lapl2; + v2lapltau[tid] += scal_fact * f_lapltau; + } +} -template class lda_eval_exc_unpolar; -template class lda_eval_exc_polar; -template class lda_eval_exc_vxc_unpolar; -template class lda_eval_exc_vxc_polar; -template class lda_eval_exc_inc_unpolar; -template class lda_eval_exc_inc_polar; -template class lda_eval_exc_vxc_inc_unpolar; -template class lda_eval_exc_vxc_inc_polar; +template +__attribute__((always_inline)) MGGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL(device_eval_vxc_fxc_inc_helper_polar_kernel) { + using traits = kernel_traits; + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* vrho_i = vrho + 2 * tid; + auto* vsigma_i = vsigma + 3 * tid; + auto* vtau_i = vtau + 2 * tid; + + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + double frho[2], fsigma[3], flapl[2], ftau[2]; + double f_rho2[3], f_rhosigma[6], f_rholapl[4], f_rhotau[4], f_sigma2[6], f_sigmalapl[6], f_sigmatau[6], f_lapl2[3], f_lapltau[4], f_tau2[3]; + + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + frho[0], frho[1], fsigma[0], fsigma[1], fsigma[2], + flapl[0], flapl[1], ftau[0], ftau[1], + f_rho2[0], f_rho2[1], f_rho2[2], + f_rhosigma[0], f_rhosigma[1], f_rhosigma[2], + f_rhosigma[3], f_rhosigma[4], f_rhosigma[5], + f_rholapl[0], f_rholapl[1], f_rholapl[2], f_rholapl[3], + f_rhotau[0], f_rhotau[1], f_rhotau[2], f_rhotau[3], + f_sigma2[0], f_sigma2[1], f_sigma2[2], + f_sigma2[3], f_sigma2[4], f_sigma2[5], + f_sigmalapl[0], f_sigmalapl[1], f_sigmalapl[2], + f_sigmalapl[3], f_sigmalapl[4], f_sigmalapl[5], + f_sigmatau[0], f_sigmatau[1], f_sigmatau[2], + f_sigmatau[3], f_sigmatau[4], f_sigmatau[5], + f_lapl2[0], f_lapl2[1], f_lapl2[2], + f_lapltau[0], f_lapltau[1], f_lapltau[2], f_lapltau[3], + f_tau2[0], f_tau2[1], f_tau2[2] ); + + for(int i=0;i<2;++i) vrho_i[i] += scal_fact * frho[i]; + for(int i=0;i<3;++i) vsigma_i[i] += scal_fact * fsigma[i]; + for(int i=0;i<2;++i) vtau_i[i] += scal_fact * ftau[i]; + + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f_rho2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f_rhosigma[i]; + for(int i=0;i<4;++i) v2rhotau_i[i] += scal_fact * f_rhotau[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f_sigma2[i]; + for(int i=0;i<6;++i) v2sigmatau_i[i] += scal_fact * f_sigmatau[i]; + for(int i=0;i<3;++i) v2tau2_i[i] += scal_fact * f_tau2[i]; + + if(traits::needs_laplacian) { + auto* vlapl_i = vlapl + 2 * tid; + auto* v2rholapl_i = v2rholapl + 4 * tid; + auto* v2sigmalapl_i = v2sigmalapl + 6 * tid; + auto* v2lapl2_i = v2lapl2 + 3 * tid; + auto* v2lapltau_i = v2lapltau + 4 * tid; + for(int i=0;i<2;++i) vlapl_i[i] += scal_fact * flapl[i]; + for(int i=0;i<4;++i) v2rholapl_i[i] += scal_fact * f_rholapl[i]; + for(int i=0;i<6;++i) v2sigmalapl_i[i] += scal_fact * f_sigmalapl[i]; + for(int i=0;i<3;++i) v2lapl2_i[i] += scal_fact * f_lapl2[i]; + for(int i=0;i<4;++i) v2lapltau_i[i] += scal_fact * f_lapltau[i]; + } + +} -template class gga_eval_exc_unpolar; -template class gga_eval_exc_polar; -template class gga_eval_exc_vxc_unpolar; -template class gga_eval_exc_vxc_polar; -template class gga_eval_exc_inc_unpolar; -template class gga_eval_exc_inc_polar; -template class gga_eval_exc_vxc_inc_unpolar; -template class gga_eval_exc_vxc_inc_polar; @@ -356,11 +979,9 @@ template class gga_eval_exc_vxc_inc_polar; template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), - [=](sycl::id<1> idx) { - device_eval_exc_helper_unpolar_kernel( - N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_unpolar_kernel( + N, rho, eps, tid); }); } @@ -368,11 +989,9 @@ LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_helper_polar_kernel( - N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_polar_kernel( + N, rho, eps, tid); }); } @@ -380,11 +999,9 @@ LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_unpolar_kernel( - N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_unpolar_kernel( + N, rho, eps, vxc, tid); }); } @@ -392,31 +1009,58 @@ LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_polar_kernel( - N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_polar_kernel( + N, rho, eps, vxc, tid); }); } +template +LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_unpolar_kernel( + N, rho, fxc, tid); + }); +} +template +LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_polar_kernel( + N, rho, fxc, tid); + }); +} +template +LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_unpolar_kernel( + N, rho, vxc, fxc, tid); + }); +} + +template +LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_polar_kernel( + N, rho, vxc, fxc, tid); + }); + +} template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_unpolar_kernel( - scal_fact, N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_unpolar_kernel( + scal_fact, N, rho, eps, tid); }); } @@ -424,11 +1068,9 @@ LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_polar_kernel( - scal_fact, N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_polar_kernel( + scal_fact, N, rho, eps, tid); }); } @@ -436,11 +1078,9 @@ LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_unpolar_kernel( - scal_fact, N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, eps, vxc, tid); }); } @@ -448,20 +1088,52 @@ LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_polar_kernel( - scal_fact, N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_polar_kernel( + scal_fact, N, rho, eps, vxc, tid); }); } +template +LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, fxc, tid); + }); +} +template +LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, fxc, tid); + }); +} + +template +LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, vxc, fxc, tid); + }); + +} + +template +LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, vxc, fxc, tid); + }); + +} @@ -469,11 +1141,9 @@ LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_helper_unpolar_kernel( - N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_unpolar_kernel( + N, rho, sigma, eps, tid); }); } @@ -481,11 +1151,9 @@ GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_helper_polar_kernel( - N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_polar_kernel( + N, rho, sigma, eps, tid); }); } @@ -493,11 +1161,9 @@ GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_unpolar_kernel( - N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_unpolar_kernel( + N, rho, sigma, eps, vrho, vsigma, tid); }); } @@ -505,31 +1171,59 @@ GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_polar_kernel( - N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_polar_kernel( + N, rho, sigma, eps, vrho, vsigma, tid); }); } +template +GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_unpolar_kernel( + N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} + +template +GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_polar_kernel( + N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); + +} + +template +GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_unpolar_kernel( + N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} +template +GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_polar_kernel( + N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_unpolar_kernel( - scal_fact, N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, eps, tid); }); } @@ -537,11 +1231,9 @@ GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_polar_kernel( - scal_fact, N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, eps, tid); }); } @@ -549,11 +1241,9 @@ GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_unpolar_kernel( - scal_fact, N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, eps, vrho, vsigma, tid); }); } @@ -561,15 +1251,234 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_polar_kernel( - scal_fact, N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, eps, vrho, vsigma, tid); + }); + +} + + +template +GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} + +template +GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} + +template +GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} + +template +GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); + +} + +template +MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_polar_kernel( + N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); + }); + +} + +template +MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_polar_kernel( + N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); + }); + +} + +template +MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_polar_kernel( + N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_polar_kernel( + N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); + }); + +} + +template +MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); }); } +template +MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + + #define LDA_GENERATE_DEVICE_HELPERS(KERN) \ template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ @@ -579,7 +1488,15 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ); \ template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ); \ template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ - template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); + template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); \ + template LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ); \ + template LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ); \ + template LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ); \ + template LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ); \ + template LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ); \ + template LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ); \ + template LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ); \ + template LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ); #define GGA_GENERATE_DEVICE_HELPERS(KERN) \ template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ @@ -589,11 +1506,38 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ); \ template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ); \ template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ - template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); + template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); \ + template GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ); \ + template GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ); \ + template GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ); \ + template GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ); \ + template GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ); \ + template GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ); \ + template GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ); \ + template GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ); + +#define MGGA_GENERATE_DEVICE_HELPERS(KERN) \ + template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ + template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ); \ + template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ); \ + template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar );\ + template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ); \ + template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ); \ + template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ + template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); \ + template MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ); \ + template MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ); \ + template MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ); \ + template MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ); \ + template MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ); \ + template MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ); \ + template MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ); \ + template MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ); LDA_GENERATE_DEVICE_HELPERS( BuiltinSlaterExchange ); LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN3 ); LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN_RPA ); +LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN ); LDA_GENERATE_DEVICE_HELPERS( BuiltinPW91_LDA ); LDA_GENERATE_DEVICE_HELPERS( BuiltinPW91_LDA_MOD ); LDA_GENERATE_DEVICE_HELPERS( BuiltinPW91_LDA_RPA ); @@ -605,12 +1549,93 @@ GGA_GENERATE_DEVICE_HELPERS( BuiltinLYP ); GGA_GENERATE_DEVICE_HELPERS( BuiltinPBE_X ); GGA_GENERATE_DEVICE_HELPERS( BuiltinRevPBE_X ); GGA_GENERATE_DEVICE_HELPERS( BuiltinPBE_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinB97_D ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinITYH_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinITYH_X_033 ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinITYH_X_015 ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinP86_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinP86VWN_FT_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPW91_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPBE_SOL_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinBMK_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinN12_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinN12_SX_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinSOGGA11_X_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPW91_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinMPW91_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinOPTX_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinRPBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinSOGGA11_X_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPW86_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_V_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_D_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_D3_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinHJS_PBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinLCwPBE_wPBEh_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinLRCwPBE_HJS_PBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinLRCwPBEh_HJS_PBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWPBEh_X_default0 ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinHSE03_wPBEh_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinHSE06_wPBEh_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCAN_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCAN_C ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCAN_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCAN_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinFT98_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM062X_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM062X_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPKZB_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPKZB_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinTPSS_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevTPSS_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_HF_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevM06_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_SX_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_HF_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevM06_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_SX_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_2X_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_HX_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_SO_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinCF22D_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM11_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_SX_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinTPSS_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevTPSS_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRSCAN_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinBC95_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMBEEF_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRSCAN_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinBMK_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_HX_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_SO_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinCF22D_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_SX_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM11_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_2X_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPC07_K ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPC07OPT_K ); + +MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCANL_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCANL_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCANL_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCANL_X ); LDA_GENERATE_DEVICE_HELPERS( BuiltinEPC17_1 ) LDA_GENERATE_DEVICE_HELPERS( BuiltinEPC17_2 ) diff --git a/src/sycl/exchcxx_sycl.cmake b/src/sycl/exchcxx_sycl.cmake index c1f80f5..9ce1e4d 100644 --- a/src/sycl/exchcxx_sycl.cmake +++ b/src/sycl/exchcxx_sycl.cmake @@ -13,9 +13,33 @@ list( APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ) find_package( SYCL REQUIRED ) target_link_libraries( exchcxx PUBLIC SYCL::SYCL ) + +# --- AoT-builds SYCL target alias pass-through --- +set(_EXCHCXX_SYCL_ALLOWED + intel_gpu_pvc + nvidia_gpu_sm_80 + nvidia_gpu_sm_90 + amd_gpu_gfx90a + amd_gpu_gfx942 +) +if(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "") + list(FIND _EXCHCXX_SYCL_ALLOWED "${EXCHCXX_SYCL_TARGET}" _exchcxx_sycl_idx) + if(_exchcxx_sycl_idx EQUAL -1) + message(FATAL_ERROR "Invalid EXCHCXX_SYCL_TARGET='${EXCHCXX_SYCL_TARGET}'. " "Allowed values: ${_EXCHCXX_SYCL_ALLOWED}") + endif() + + target_compile_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) + target_link_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) + message(STATUS "ExchCXX SYCL AoT enabled for target: ${EXCHCXX_SYCL_TARGET}") +endif() + + +target_compile_options(exchcxx PRIVATE $<$:-ffp-model=precise>) +target_link_options(exchcxx PRIVATE -fsycl-max-parallel-link-jobs=20) + include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-fno-sycl-id-queries-fit-in-int" EXCHCXX_SYCL_ID_QUERIES_FIT_IN_INT ) -check_cxx_compiler_flag("-fsycl-device-code-split=per_kernel" EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_KERNEL ) +check_cxx_compiler_flag("-fsycl-device-code-split=per_source" EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_SOURCE ) check_cxx_compiler_flag("-fno-sycl-early-optimizations" EXCHCXX_SYCL_HAS_NO_EARLY_OPTIMIZATIONS ) @@ -25,9 +49,9 @@ if( EXCHCXX_SYCL_ID_QUERIES_FIT_IN_INT ) ) endif() -if( EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_KERNEL ) +if( EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_SOURCE ) target_compile_options( exchcxx PRIVATE - $<$: -fsycl-device-code-split=per_kernel> + $<$: -fsycl-device-code-split=per_source> ) endif() diff --git a/src/sycl/libxc_device.cxx b/src/sycl/libxc_device.cxx index 89102ba..758485e 100644 --- a/src/sycl/libxc_device.cxx +++ b/src/sycl/libxc_device.cxx @@ -142,6 +142,56 @@ LDA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { } +LDA_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_fxc = this->v2rho2_buffer_len(N); + + size_t len_rho = sz_rho*sizeof(double); + size_t len_fxc = sz_fxc*sizeof(double); + + std::vector rho_host( sz_rho ), fxc_host( sz_fxc ); + + recv_from_device( rho_host.data(), rho, len_rho, queue ); + + queue_sync( queue ); + xc_lda_fxc( &kernel_, N, rho_host.data(), fxc_host.data() ); + + send_to_device( fxc, fxc_host.data(), len_fxc, queue ); + queue_sync( queue ); // Lifetime of host vectors + +} + + +LDA_VXC_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_vxc_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_vxc = this->vrho_buffer_len(N); + size_t sz_fxc = this->v2rho2_buffer_len(N); + + size_t len_rho = sz_rho*sizeof(double); + size_t len_vxc = sz_vxc*sizeof(double); + size_t len_fxc = sz_fxc*sizeof(double); + + std::vector rho_host( sz_rho ), vxc_host( sz_vxc ), fxc_host( sz_fxc ); + + recv_from_device( rho_host.data(), rho, len_rho, queue ); + + queue_sync( queue ); + xc_lda_vxc_fxc( &kernel_, N, rho_host.data(), vxc_host.data(), fxc_host.data() ); + + send_to_device( vxc, vxc_host.data(), len_vxc, queue ); + send_to_device( fxc, fxc_host.data(), len_fxc, queue ); + queue_sync( queue ); // Lifetime of host vectors + +} + // TODO: LDA kxc interfaces // GGA interface @@ -189,14 +239,14 @@ GGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { size_t len_vsigma = sz_vsigma*sizeof(double); size_t len_eps = sz_eps *sizeof(double); - std::vector rho_host( sz_rho ), eps_host( sz_eps ), + std::vector rho_host( sz_rho ), eps_host( sz_eps ), sigma_host( sz_sigma ), vrho_host( sz_vrho ), vsigma_host( sz_vsigma ); recv_from_device( rho_host.data(), rho, len_rho , queue ); recv_from_device( sigma_host.data(), sigma, len_sigma, queue ); - + queue_sync( queue ); - xc_gga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), eps_host.data(), + xc_gga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), eps_host.data(), vrho_host.data(), vsigma_host.data() ); send_to_device( eps, eps_host.data(), len_eps , queue); @@ -206,9 +256,89 @@ GGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { } -// TODO: GGA kxc interfaces - - + GGA_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2sigma2_host(sz_v2sigma2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + + queue_sync(queue); + xc_gga_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), v2sigma2_host.data()); + + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + + queue_sync(queue); // Lifetime of host vectors +} + +GGA_VXC_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_vxc_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_vrho = this->vrho_buffer_len(N); + size_t sz_vsigma = this->vsigma_buffer_len(N); + + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_vrho = sz_vrho * sizeof(double); + size_t len_vsigma = sz_vsigma * sizeof(double); + + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + vrho_host(sz_vrho), vsigma_host(sz_vsigma), + v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2sigma2_host(sz_v2sigma2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + + queue_sync(queue); + xc_gga_vxc_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + vrho_host.data(), vsigma_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), + v2sigma2_host.data()); + + send_to_device(vrho, vrho_host.data(), len_vrho, queue); + send_to_device(vsigma, vsigma_host.data(), len_vsigma, queue); + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + queue_sync(queue); // Lifetime of host vectors +} + +// TODO: GGA kxc interfaces + + // mGGA interface MGGA_EXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_device_ ) const { @@ -227,8 +357,8 @@ MGGA_EXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_device_ ) const { size_t len_tau = sz_tau *sizeof(double); size_t len_eps = sz_eps *sizeof(double); - std::vector rho_host( sz_rho ), eps_host( sz_eps ), - sigma_host( sz_sigma ), lapl_host( sz_lapl ), + std::vector rho_host( sz_rho ), eps_host( sz_eps ), + sigma_host( sz_sigma ), lapl_host( sz_lapl ), tau_host( sz_tau ); recv_from_device( rho_host.data(), rho, len_rho , queue ); @@ -237,7 +367,7 @@ MGGA_EXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_device_ ) const { recv_from_device( tau_host.data(), tau, len_tau , queue ); queue_sync( queue ); - xc_mgga_exc( &kernel_, N, rho_host.data(), sigma_host.data(), lapl_host.data(), + xc_mgga_exc( &kernel_, N, rho_host.data(), sigma_host.data(), lapl_host.data(), tau_host.data(), eps_host.data() ); send_to_device( eps, eps_host.data(), len_eps, queue ); @@ -271,9 +401,9 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { size_t len_vlapl = sz_vlapl *sizeof(double); size_t len_vtau = sz_vtau *sizeof(double); - std::vector rho_host( sz_rho ), eps_host( sz_eps ), sigma_host( sz_sigma ), + std::vector rho_host( sz_rho ), eps_host( sz_eps ), sigma_host( sz_sigma ), lapl_host( sz_lapl ), tau_host( sz_tau ); - std::vector vrho_host( sz_vrho ), vsigma_host( sz_vsigma ), + std::vector vrho_host( sz_vrho ), vsigma_host( sz_vsigma ), vlapl_host( sz_vlapl ), vtau_host( sz_vtau ); recv_from_device( rho_host.data(), rho, len_rho , queue ); @@ -282,8 +412,8 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { recv_from_device( tau_host.data(), tau, len_tau , queue ); queue_sync( queue ); - xc_mgga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), - lapl_host.data(), tau_host.data(), eps_host.data(), vrho_host.data(), + xc_mgga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), + lapl_host.data(), tau_host.data(), eps_host.data(), vrho_host.data(), vsigma_host.data(), vlapl_host.data(), vtau_host.data() ); send_to_device( eps, eps_host.data(), len_eps , queue ); @@ -295,19 +425,199 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { } +MGGA_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC, + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_lapl = this->lapl_buffer_len(N) ; + size_t sz_tau = this->tau_buffer_len(N) ; + + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2rholapl = this->v2rholapl_buffer_len(N); + size_t sz_v2rhotau = this->v2rhotau_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + size_t sz_v2sigmalapl = this->v2sigmalapl_buffer_len(N); + size_t sz_v2sigmatau = this->v2sigmatau_buffer_len(N); + size_t sz_v2lapl2 = this->v2lapl2_buffer_len(N); + size_t sz_v2lapltau = this->v2lapltau_buffer_len(N); + size_t sz_v2tau2 = this->v2tau2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_lapl = sz_lapl * sizeof(double); + size_t len_tau = sz_tau * sizeof(double); + + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2rholapl = sz_v2rholapl * sizeof(double); + size_t len_v2rhotau = sz_v2rhotau * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + size_t len_v2sigmalapl = sz_v2sigmalapl * sizeof(double); + size_t len_v2sigmatau = sz_v2sigmatau * sizeof(double); + size_t len_v2lapl2 = sz_v2lapl2 * sizeof(double); + size_t len_v2lapltau = sz_v2lapltau * sizeof(double); + size_t len_v2tau2 = sz_v2tau2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + lapl_host(sz_lapl), tau_host(sz_tau); + + std::vector v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2rholapl_host(sz_v2rholapl), v2rhotau_host(sz_v2rhotau), + v2sigma2_host(sz_v2sigma2), v2sigmalapl_host(sz_v2sigmalapl), + v2sigmatau_host(sz_v2sigmatau), v2lapl2_host(sz_v2lapl2), + v2lapltau_host(sz_v2lapltau), v2tau2_host(sz_v2tau2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + recv_from_device(lapl_host.data(), lapl, len_lapl, queue); + recv_from_device(tau_host.data(), tau, len_tau, queue); + + queue_sync(queue); + xc_mgga_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + lapl_host.data(), tau_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), v2rholapl_host.data(), + v2rhotau_host.data(), v2sigma2_host.data(), v2sigmalapl_host.data(), + v2sigmatau_host.data(), v2lapl2_host.data(), v2lapltau_host.data(), + v2tau2_host.data()); + + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2rholapl, v2rholapl_host.data(), len_v2rholapl, queue); + send_to_device(v2rhotau, v2rhotau_host.data(), len_v2rhotau, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + send_to_device(v2sigmalapl, v2sigmalapl_host.data(), len_v2sigmalapl, queue); + send_to_device(v2sigmatau, v2sigmatau_host.data(), len_v2sigmatau, queue); + send_to_device(v2lapl2, v2lapl2_host.data(), len_v2lapl2, queue); + send_to_device(v2lapltau, v2lapltau_host.data(), len_v2lapltau, queue); + send_to_device(v2tau2, v2tau2_host.data(), len_v2tau2, queue); + + queue_sync(queue); // Lifetime of host vectors +} + + +MGGA_VXC_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_vxc_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_lapl = this->lapl_buffer_len(N); + size_t sz_tau = this->tau_buffer_len(N) ; + + size_t sz_vrho = this->vrho_buffer_len(N); + size_t sz_vsigma = this->vsigma_buffer_len(N); + size_t sz_vlapl = this->vlapl_buffer_len(N); + size_t sz_vtau = this->vtau_buffer_len(N) ; + + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2rholapl = this->v2rholapl_buffer_len(N); + size_t sz_v2rhotau = this->v2rhotau_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + size_t sz_v2sigmalapl = this->v2sigmalapl_buffer_len(N); + size_t sz_v2sigmatau = this->v2sigmatau_buffer_len(N); + size_t sz_v2lapl2 = this->v2lapl2_buffer_len(N); + size_t sz_v2lapltau = this->v2lapltau_buffer_len(N); + size_t sz_v2tau2 = this->v2tau2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_lapl = sz_lapl * sizeof(double); + size_t len_tau = sz_tau * sizeof(double); + + size_t len_vrho = sz_vrho * sizeof(double); + size_t len_vsigma = sz_vsigma * sizeof(double); + size_t len_vlapl = sz_vlapl * sizeof(double); + size_t len_vtau = sz_vtau * sizeof(double); + + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2rholapl = sz_v2rholapl * sizeof(double); + size_t len_v2rhotau = sz_v2rhotau * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + size_t len_v2sigmalapl = sz_v2sigmalapl * sizeof(double); + size_t len_v2sigmatau = sz_v2sigmatau * sizeof(double); + size_t len_v2lapl2 = sz_v2lapl2 * sizeof(double); + size_t len_v2lapltau = sz_v2lapltau * sizeof(double); + size_t len_v2tau2 = sz_v2tau2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + lapl_host(sz_lapl), tau_host(sz_tau); + + std::vector vrho_host(sz_vrho), vsigma_host(sz_vsigma), + vlapl_host(sz_vlapl), vtau_host(sz_vtau); + + std::vector v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2rholapl_host(sz_v2rholapl), v2rhotau_host(sz_v2rhotau), + v2sigma2_host(sz_v2sigma2), v2sigmalapl_host(sz_v2sigmalapl), + v2sigmatau_host(sz_v2sigmatau), v2lapl2_host(sz_v2lapl2), + v2lapltau_host(sz_v2lapltau), v2tau2_host(sz_v2tau2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + recv_from_device(lapl_host.data(), lapl, len_lapl, queue); + recv_from_device(tau_host.data(), tau, len_tau, queue); + + queue_sync(queue); + xc_mgga_vxc_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + lapl_host.data(), tau_host.data(), + vrho_host.data(), vsigma_host.data(), vlapl_host.data(), vtau_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), v2rholapl_host.data(), + v2rhotau_host.data(), v2sigma2_host.data(), v2sigmalapl_host.data(), + v2sigmatau_host.data(), v2lapl2_host.data(), v2lapltau_host.data(), + v2tau2_host.data()); + + send_to_device(vrho, vrho_host.data(), len_vrho, queue); + send_to_device(vsigma, vsigma_host.data(), len_vsigma, queue); + send_to_device(vlapl, vlapl_host.data(), len_vlapl, queue); + send_to_device(vtau, vtau_host.data(), len_vtau, queue); + + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2rholapl, v2rholapl_host.data(), len_v2rholapl, queue); + send_to_device(v2rhotau, v2rhotau_host.data(), len_v2rhotau, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + send_to_device(v2sigmalapl, v2sigmalapl_host.data(), len_v2sigmalapl, queue); + send_to_device(v2sigmatau, v2sigmatau_host.data(), len_v2sigmatau, queue); + send_to_device(v2lapl2, v2lapl2_host.data(), len_v2lapl2, queue); + send_to_device(v2lapltau, v2lapltau_host.data(), len_v2lapltau, queue); + send_to_device(v2tau2, v2tau2_host.data(), len_v2tau2, queue); + + queue_sync(queue); // Lifetime of host vectors +} + + +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC, LibxcKernelImpl::eval_exc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC_VXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC_VXC, LibxcKernelImpl::eval_exc_vxc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC, LibxcKernelImpl::eval_exc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC_VXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC_VXC, LibxcKernelImpl::eval_exc_vxc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC, LibxcKernelImpl::eval_exc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC_VXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC_VXC, LibxcKernelImpl::eval_exc_vxc_inc_device_, const ) + +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, FXC, + LibxcKernelImpl::eval_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, FXC, + LibxcKernelImpl::eval_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, FXC, + LibxcKernelImpl::eval_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, VXC_FXC, + LibxcKernelImpl::eval_vxc_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, VXC_FXC, + LibxcKernelImpl::eval_vxc_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, VXC_FXC, + LibxcKernelImpl::eval_vxc_fxc_inc_device_, const ) + } } diff --git a/src/sycl/xc_functional_device.cxx b/src/sycl/xc_functional_device.cxx index ac36f75..42cce84 100644 --- a/src/sycl/xc_functional_device.cxx +++ b/src/sycl/xc_functional_device.cxx @@ -172,6 +172,81 @@ LDA_EXC_VXC_GENERATOR_DEVICE( XCFunctional::eval_exc_vxc_device ) const { } +LDA_FXC_GENERATOR_DEVICE( XCFunctional::eval_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + const size_t len_fxc_buffer = v2rho2_buffer_len(N); + + double* fxc_scr = nullptr; + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) + fxc_scr = safe_sycl_malloc( len_fxc_buffer, queue ); + + safe_zero( len_fxc_buffer, fxc, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, fxc, queue + ); + } else { + double* fxc_eval = i ? fxc_scr : fxc; + kernels_[i].second.eval_fxc_device(N, rho, fxc_eval, queue); + + if( i ) + add_scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + else + scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + } + } + + if( fxc_scr ) sycl::free( fxc_scr, *queue ); +} + +LDA_VXC_FXC_GENERATOR_DEVICE( XCFunctional::eval_vxc_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + const size_t len_vxc_buffer = vrho_buffer_len(N); + const size_t len_fxc_buffer = v2rho2_buffer_len(N); + + double* vxc_scr(nullptr), *fxc_scr(nullptr); + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + vxc_scr = safe_sycl_malloc( len_vxc_buffer, queue ); + fxc_scr = safe_sycl_malloc( len_fxc_buffer, queue ); + } + + safe_zero( len_vxc_buffer, vxc, queue ); + safe_zero( len_fxc_buffer, fxc, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, vxc, fxc, queue + ); + } else { + double* vxc_eval = i ? vxc_scr : vxc; + double* fxc_eval = i ? fxc_scr : fxc; + kernels_[i].second.eval_vxc_fxc_device(N, rho, vxc_eval, fxc_eval, queue); + + if( i ) { + add_scal_device( len_vxc_buffer, kernels_[i].first, vxc_eval, vxc, queue ); + add_scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + } else { + scal_device( len_vxc_buffer, kernels_[i].first, vxc_eval, vxc, queue ); + scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + } + } + } + + if( vxc_scr ) sycl::free( vxc_scr, *queue ); + if( fxc_scr ) sycl::free( fxc_scr, *queue ); +} + // GGA Interfaces @@ -293,8 +368,151 @@ GGA_EXC_VXC_GENERATOR_DEVICE( XCFunctional::eval_exc_vxc_device ) const { if( vsigma_scr ) sycl::free( vsigma_scr, *queue ); } +GGA_FXC_GENERATOR_DEVICE( XCFunctional::eval_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2sigma2_scr(nullptr); + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2sigma2_scr = safe_sycl_malloc( len_v2sigma2_buffer, queue ); + } + + safe_zero( len_v2rho2_buffer, v2rho2, queue ); + safe_zero( len_v2rhosigma_buffer, v2rhosigma, queue ); + safe_zero( len_v2sigma2_buffer, v2sigma2, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, queue + ); + else + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, v2rho2, queue + ); + } else { + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + + if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_device(N, rho, sigma, v2rho2_eval, + v2rhosigma_eval, v2sigma2_eval, queue ); + else + kernels_[i].second.eval_fxc_device(N, rho, v2rho2_eval, queue); + + if( i ) { + add_scal_device( len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue ); + if( kernels_[i].second.is_gga() ){ + add_scal_device( len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue ); + add_scal_device( len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue ); + } + + } else { + scal_device( len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue ); + if( kernels_[i].second.is_gga() ){ + scal_device( len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue ); + scal_device( len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue ); + } + } + } + } + + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); +} + +GGA_VXC_FXC_GENERATOR_DEVICE( XCFunctional::eval_vxc_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + const size_t len_vrho_buffer = vrho_buffer_len(N); + const size_t len_vsigma_buffer = vsigma_buffer_len(N); + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + + double* vrho_scr(nullptr), *vsigma_scr(nullptr); + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2sigma2_scr(nullptr); + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + vrho_scr = safe_sycl_malloc( len_vrho_buffer, queue ); + vsigma_scr = safe_sycl_malloc( len_vsigma_buffer, queue ); + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2sigma2_scr = safe_sycl_malloc( len_v2sigma2_buffer, queue ); + } + + safe_zero( len_vrho_buffer, vrho, queue ); + safe_zero( len_vsigma_buffer, vsigma, queue ); + safe_zero( len_v2rho2_buffer, v2rho2, queue ); + safe_zero( len_v2rhosigma_buffer, v2rhosigma, queue ); + safe_zero( len_v2sigma2_buffer, v2sigma2, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, queue + ); + } else { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, vrho, v2rho2, queue + ); + } + } else { + double* vrho_eval = i ? vrho_scr : vrho; + double* vsigma_eval = i ? vsigma_scr : vsigma; + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + + if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_device( + N, rho, sigma, vrho_eval, vsigma_eval, v2rho2_eval, v2rhosigma_eval, v2sigma2_eval, queue); + } else { + kernels_[i].second.eval_vxc_fxc_device(N, rho, vrho_eval, v2rho2_eval, queue); + } + + if (i) { + add_scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + add_scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga()) { + add_scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + add_scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + add_scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + } else { + scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + if (kernels_[i].second.is_gga()) { + scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + } + } + } + if( vrho_scr ) sycl::free( vrho_scr, *queue); + if( vsigma_scr ) sycl::free( vsigma_scr, *queue); + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); +} // mGGA Interfaces @@ -453,4 +671,329 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( XCFunctional::eval_exc_vxc_device ) const { if( vtau_scr ) sycl::free( vtau_scr, *queue ); } +MGGA_FXC_GENERATOR_DEVICE( XCFunctional::eval_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); + + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2rholapl_buffer = v2rholapl_buffer_len(N); + const size_t len_v2rhotau_buffer = v2rhotau_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + const size_t len_v2sigmalapl_buffer = v2sigmalapl_buffer_len(N); + const size_t len_v2sigmatau_buffer = v2sigmatau_buffer_len(N); + const size_t len_v2lapl2_buffer = v2lapl2_buffer_len(N); + const size_t len_v2lapltau_buffer = v2lapltau_buffer_len(N); + const size_t len_v2tau2_buffer = v2tau2_buffer_len(N); + + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2rholapl_scr(nullptr), *v2rhotau_scr(nullptr), + *v2sigma2_scr(nullptr), *v2sigmalapl_scr(nullptr), *v2sigmatau_scr(nullptr), *v2lapl2_scr(nullptr), + *v2lapltau_scr(nullptr), *v2tau2_scr(nullptr); + + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2rholapl_scr = safe_sycl_malloc( len_v2rholapl_buffer, queue ); + v2rhotau_scr = safe_sycl_malloc( len_v2rhotau_buffer, queue ); + v2sigma2_scr = safe_sycl_malloc( len_v2sigma2_buffer, queue ); + v2sigmalapl_scr = safe_sycl_malloc( len_v2sigmalapl_buffer, queue ); + v2sigmatau_scr = safe_sycl_malloc( len_v2sigmatau_buffer, queue ); + v2lapl2_scr = safe_sycl_malloc( len_v2lapl2_buffer, queue ); + v2lapltau_scr = safe_sycl_malloc( len_v2lapltau_buffer, queue ); + v2tau2_scr = safe_sycl_malloc( len_v2tau2_buffer, queue ); + } + + safe_zero( len_v2rho2_buffer, v2rho2, queue ); + safe_zero( len_v2rhosigma_buffer, v2rhosigma, queue ); + safe_zero( len_v2rholapl_buffer, v2rholapl, queue ); + safe_zero( len_v2rhotau_buffer, v2rhotau, queue ); + safe_zero( len_v2sigma2_buffer, v2sigma2, queue ); + safe_zero( len_v2sigmalapl_buffer, v2sigmalapl, queue ); + safe_zero( len_v2sigmatau_buffer, v2sigmatau, queue ); + safe_zero( len_v2lapl2_buffer, v2lapl2, queue ); + safe_zero( len_v2lapltau_buffer, v2lapltau, queue ); + safe_zero( len_v2tau2_buffer, v2tau2, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + + if( use_inc ) { + if( kernels_[i].second.is_mgga() ) + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, queue + ); + else if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, queue + ); + else + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, v2rho2, queue + ); + } else { + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2rholapl_eval = i ? v2rholapl_scr : v2rholapl; + double* v2rhotau_eval = i ? v2rhotau_scr : v2rhotau; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + double* v2sigmalapl_eval = i ? v2sigmalapl_scr : v2sigmalapl; + double* v2sigmatau_eval = i ? v2sigmatau_scr : v2sigmatau; + double* v2lapl2_eval = i ? v2lapl2_scr : v2lapl2; + double* v2lapltau_eval = i ? v2lapltau_scr : v2lapltau; + double* v2tau2_eval = i ? v2tau2_scr : v2tau2; + + if( kernels_[i].second.is_mgga() ) + kernels_[i].second.eval_fxc_device(N, rho, sigma, lapl, tau, v2rho2_eval, + v2rhosigma_eval, v2rholapl_eval, v2rhotau_eval, v2sigma2_eval, v2sigmalapl_eval, + v2sigmatau_eval, v2lapl2_eval, v2lapltau_eval, v2tau2_eval, queue); + else if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_device(N, rho, sigma, v2rho2_eval, v2rhosigma_eval, v2sigma2_eval, queue); + else + kernels_[i].second.eval_fxc_device(N, rho, v2rho2_eval, queue); + + if (i) { + add_scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if( kernels_[i].second.is_gga() or kernels_[i].second.is_mgga() ){ + add_scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + add_scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if( kernels_[i].second.needs_laplacian() ) { + add_scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + add_scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + add_scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if( kernels_[i].second.is_mgga() ) { + add_scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + add_scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + add_scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if ( kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga() ) { + add_scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + + } else{ + + scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga() or kernels_[i].second.is_mgga()) { + scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if (kernels_[i].second.needs_laplacian()) { + scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if (kernels_[i].second.is_mgga()) { + scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if (kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga()) { + scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + } + } + } + + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2rholapl_scr ) sycl::free( v2rholapl_scr, *queue); + if( v2rhotau_scr ) sycl::free( v2rhotau_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); + if( v2sigmalapl_scr ) sycl::free( v2sigmalapl_scr, *queue); + if( v2sigmatau_scr ) sycl::free( v2sigmatau_scr, *queue); + if( v2lapl2_scr ) sycl::free( v2lapl2_scr, *queue); + if( v2lapltau_scr ) sycl::free( v2lapltau_scr, *queue); + if( v2tau2_scr ) sycl::free( v2tau2_scr, *queue); +} + +MGGA_VXC_FXC_GENERATOR_DEVICE( XCFunctional::eval_vxc_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); + + const size_t len_vrho_buffer = vrho_buffer_len(N); + const size_t len_vsigma_buffer = vsigma_buffer_len(N); + const size_t len_vlapl_buffer = vlapl_buffer_len(N); + const size_t len_vtau_buffer = vtau_buffer_len(N); + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2rholapl_buffer = v2rholapl_buffer_len(N); + const size_t len_v2rhotau_buffer = v2rhotau_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + const size_t len_v2sigmalapl_buffer = v2sigmalapl_buffer_len(N); + const size_t len_v2sigmatau_buffer = v2sigmatau_buffer_len(N); + const size_t len_v2lapl2_buffer = v2lapl2_buffer_len(N); + const size_t len_v2lapltau_buffer = v2lapltau_buffer_len(N); + const size_t len_v2tau2_buffer = v2tau2_buffer_len(N); + + double* vrho_scr(nullptr), *vsigma_scr(nullptr), *vlapl_scr(nullptr), *vtau_scr(nullptr); + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2rholapl_scr(nullptr), *v2rhotau_scr(nullptr), + *v2sigma2_scr(nullptr), *v2sigmalapl_scr(nullptr), *v2sigmatau_scr(nullptr), *v2lapl2_scr(nullptr), + *v2lapltau_scr(nullptr), *v2tau2_scr(nullptr); + + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + vrho_scr = safe_sycl_malloc( len_vrho_buffer, queue ); + vsigma_scr = safe_sycl_malloc( len_vsigma_buffer, queue ); + vlapl_scr = safe_sycl_malloc( len_vlapl_buffer, queue ); + vtau_scr = safe_sycl_malloc( len_vtau_buffer, queue ); + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2rholapl_scr = safe_sycl_malloc( len_v2rholapl_buffer, queue ); + v2rhotau_scr = safe_sycl_malloc(len_v2rhotau_buffer, queue); + v2sigma2_scr = safe_sycl_malloc(len_v2sigma2_buffer, queue); + v2sigmalapl_scr = safe_sycl_malloc(len_v2sigmalapl_buffer, queue); + v2sigmatau_scr = safe_sycl_malloc(len_v2sigmatau_buffer, queue); + v2lapl2_scr = safe_sycl_malloc(len_v2lapl2_buffer, queue); + v2lapltau_scr = safe_sycl_malloc(len_v2lapltau_buffer, queue); + v2tau2_scr = safe_sycl_malloc(len_v2tau2_buffer, queue); + } + + safe_zero(len_vrho_buffer, vrho, queue); + safe_zero(len_vsigma_buffer, vsigma, queue); + safe_zero(len_vlapl_buffer, vlapl, queue); + safe_zero(len_vtau_buffer, vtau, queue); + safe_zero(len_v2rho2_buffer, v2rho2, queue); + safe_zero(len_v2rhosigma_buffer, v2rhosigma, queue); + safe_zero(len_v2rholapl_buffer, v2rholapl, queue); + safe_zero(len_v2rhotau_buffer, v2rhotau, queue); + safe_zero(len_v2sigma2_buffer, v2sigma2, queue); + safe_zero(len_v2sigmalapl_buffer, v2sigmalapl, queue); + safe_zero(len_v2sigmatau_buffer, v2sigmatau, queue); + safe_zero(len_v2lapl2_buffer, v2lapl2, queue); + safe_zero(len_v2lapltau_buffer, v2lapltau, queue); + safe_zero(len_v2tau2_buffer, v2tau2, queue); + + for (auto i = 0ul; i < kernels_.size(); ++i) { + if( use_inc ) { + if (kernels_[i].second.is_mgga()) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, + v2lapltau, v2tau2, queue); + } else if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, + v2sigma2, queue); + } else { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, vrho, v2rho2, queue); + } + } else { + double* vrho_eval = i ? vrho_scr : vrho; + double* vsigma_eval = i ? vsigma_scr : vsigma; + double* vlapl_eval = i ? vlapl_scr : vlapl; + double* vtau_eval = i ? vtau_scr : vtau; + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2rholapl_eval = i ? v2rholapl_scr : v2rholapl; + double* v2rhotau_eval = i ? v2rhotau_scr : v2rhotau; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + double* v2sigmalapl_eval = i ? v2sigmalapl_scr : v2sigmalapl; + double* v2sigmatau_eval = i ? v2sigmatau_scr : v2sigmatau; + double* v2lapl2_eval = i ? v2lapl2_scr : v2lapl2; + double* v2lapltau_eval = i ? v2lapltau_scr : v2lapltau; + double* v2tau2_eval = i ? v2tau2_scr : v2tau2; + + if (kernels_[i].second.is_mgga()) { + kernels_[i].second.eval_vxc_fxc_device( + N, rho, sigma, lapl, tau, vrho_eval, vsigma_eval, vlapl_eval, vtau_eval, + v2rho2_eval, v2rhosigma_eval, v2rholapl_eval, v2rhotau_eval, + v2sigma2_eval, v2sigmalapl_eval, v2sigmatau_eval, v2lapl2_eval, + v2lapltau_eval, v2tau2_eval, queue); + } else if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_device( + N, rho, sigma, vrho_eval, vsigma_eval, v2rho2_eval, v2rhosigma_eval, + v2sigma2_eval, queue); + } else { + kernels_[i].second.eval_vxc_fxc_device(N, rho, vrho_eval, v2rho2_eval, queue); + } + + if (i) { + add_scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + add_scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga() || kernels_[i].second.is_mgga()) { + add_scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + add_scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + add_scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if (kernels_[i].second.needs_laplacian()) { + add_scal_device(len_vlapl_buffer, kernels_[i].first, vlapl_eval, vlapl, queue); + add_scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + add_scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + add_scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if (kernels_[i].second.is_mgga()) { + add_scal_device(len_vtau_buffer, kernels_[i].first, vtau_eval, vtau, queue); + add_scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + add_scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + add_scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if (kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga()) { + add_scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + } else { + scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga() || kernels_[i].second.is_mgga()) { + scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if (kernels_[i].second.needs_laplacian()) { + scal_device(len_vlapl_buffer, kernels_[i].first, vlapl_eval, vlapl, queue); + scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if (kernels_[i].second.is_mgga()) { + scal_device(len_vtau_buffer, kernels_[i].first, vtau_eval, vtau, queue); + scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if (kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga()) { + scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + } + } + } + + if( vrho_scr ) sycl::free( vrho_scr, *queue); + if( vsigma_scr ) sycl::free( vsigma_scr, *queue); + if( vlapl_scr ) sycl::free( vlapl_scr, *queue); + if( vtau_scr ) sycl::free( vtau_scr, *queue); + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2rholapl_scr ) sycl::free( v2rholapl_scr, *queue); + if( v2rhotau_scr ) sycl::free( v2rhotau_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); + if( v2sigmalapl_scr ) sycl::free( v2sigmalapl_scr, *queue); + if( v2sigmatau_scr ) sycl::free( v2sigmatau_scr, *queue); + if( v2lapl2_scr ) sycl::free( v2lapl2_scr, *queue); + if( v2lapltau_scr ) sycl::free( v2lapltau_scr, *queue); + if( v2tau2_scr ) sycl::free( v2tau2_scr, *queue); +} + + } diff --git a/test/xc_kernel_test.cxx b/test/xc_kernel_test.cxx index cb3eb9b..71a099c 100644 --- a/test/xc_kernel_test.cxx +++ b/test/xc_kernel_test.cxx @@ -1623,7 +1623,7 @@ void device_synchronize() { } -void test_cuda_hip_interface( TestInterface interface, EvalType evaltype, +void test_device_interface( TestInterface interface, EvalType evaltype, Backend backend, Kernel kern, Spin polar ) { size_t npts_lda, npts_gga, npts_mgga, npts_lapl; @@ -2146,1330 +2146,1124 @@ void test_cuda_hip_interface( TestInterface interface, EvalType evaltype, -TEST_CASE( "CUDA Interfaces", "[xc-device]" ) { - SECTION( "Libxc Functionals" ) { +#endif - SECTION( "LDA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +#ifdef EXCHCXX_ENABLE_SYCL - SECTION( "LDA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +inline sycl::queue q{ sycl::default_selector_v, + sycl::property_list{sycl::property::queue::in_order{}} }; - SECTION( "LDA Functionals: FXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +template +T* safe_sycl_malloc( size_t n, sycl::queue& q ) { + if( n ) { + T* ptr = sycl::malloc_device(n, q); + return ptr; + } else return nullptr; +} - SECTION( "LDA Functionals: VXC + FXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +template +void safe_sycl_cpy( T* dest, const T* src, size_t len, sycl::queue& q ) { - SECTION( "GGA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } + q.memcpy( (void*)dest, (const void*)src, len*sizeof(T) ); - SECTION( "GGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +} - SECTION( "GGA Functionals: FXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +void sycl_free_all(sycl::queue&){ } +template +void sycl_free_all( sycl::queue& q, T* ptr, Args&&... args ) { - SECTION( "GGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( ptr ) { + sycl::free( (void*)ptr, q ); + } - SECTION( "MGGA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } + sycl_free_all( q, std::forward(args)... ); - SECTION( "MGGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +} - SECTION( "MGGA Functionals: FXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +void device_synchronize( sycl::queue& q ) { +q.wait_and_throw(); +} - SECTION( "MGGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +void test_device_interface( TestInterface interface, EvalType evaltype, + Backend backend, Kernel kern, Spin polar) { + size_t npts_lda, npts_gga, npts_mgga, npts_lapl; + std::vector ref_rho, ref_sigma, ref_lapl, ref_tau; + std::tie(npts_lda, ref_rho ) = load_reference_density( polar ); + std::tie(npts_gga, ref_sigma) = load_reference_sigma ( polar ); + std::tie(npts_lapl, ref_lapl) = load_reference_lapl ( polar ); + std::tie(npts_mgga, ref_tau) = load_reference_tau ( polar ); - SECTION( "LDA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + REQUIRE( npts_lda == npts_gga ); + REQUIRE( npts_lda == npts_mgga ); + REQUIRE( npts_lda == npts_lapl ); - SECTION( "LDA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + const int npts = 1;//npts_lda; - SECTION( "LDA Functionals: FXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if (polar == Spin::Unpolarized && !supports_unpolarized(kern)){ + CHECK_THROWS( XCKernel( backend, kern, polar ) ); + return; + } + XCKernel func( backend, kern, polar ); - SECTION( "LDA Functionals: VXC + FXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + size_t len_rho_buffer = func.rho_buffer_len(npts); + size_t len_sigma_buffer = func.sigma_buffer_len(npts); + size_t len_lapl_buffer = func.lapl_buffer_len(npts); + size_t len_tau_buffer = func.tau_buffer_len(npts); + size_t len_exc_buffer = func.exc_buffer_len(npts); + size_t len_vrho_buffer = func.vrho_buffer_len(npts); + size_t len_vsigma_buffer = func.vsigma_buffer_len(npts); + size_t len_vlapl_buffer = func.vlapl_buffer_len(npts); + size_t len_vtau_buffer = func.vtau_buffer_len(npts); - SECTION( "GGA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + size_t len_v2rho2 = func.v2rho2_buffer_len(npts); + size_t len_v2rhosigma = func.v2rhosigma_buffer_len(npts); + size_t len_v2rholapl = func.v2rholapl_buffer_len(npts); + size_t len_v2rhotau = func.v2rhotau_buffer_len(npts); + size_t len_v2sigma2 = func.v2sigma2_buffer_len(npts); + size_t len_v2sigmalapl = func.v2sigmalapl_buffer_len(npts); + size_t len_v2sigmatau = func.v2sigmatau_buffer_len(npts); + size_t len_v2lapl2 = func.v2lapl2_buffer_len(npts); + size_t len_v2lapltau = func.v2lapltau_buffer_len(npts); + size_t len_v2tau2 = func.v2tau2_buffer_len(npts); - SECTION( "GGA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector rho_small(len_rho_buffer, 1e-13); + std::vector sigma_small(len_sigma_buffer, 1e-14); + std::vector lapl_small(len_lapl_buffer, 1e-14); + std::vector tau_small(len_tau_buffer, 1e-14); - SECTION( "GGA Functionals: FXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector rho_zero(len_rho_buffer, 0.); + std::vector sigma_zero(len_sigma_buffer, 0.); + std::vector lapl_zero(len_lapl_buffer, 0.); + std::vector tau_zero(len_tau_buffer, 0.); - SECTION( "GGA Functionals: VXC + FXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector rho, sigma, lapl, tau; - SECTION( "MGGA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( evaltype == EvalType::Regular ) { + rho = ref_rho; + sigma = ref_sigma; + lapl = ref_lapl; + tau = ref_tau; + } - SECTION( "MGGA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( evaltype == EvalType::Small ) { + rho = rho_small; + sigma = sigma_small; + lapl = lapl_small; + tau = tau_small; + } - SECTION( "MGGA Functionals: FXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; + if( evaltype == EvalType::Zero ) { + rho = rho_zero; + sigma = sigma_zero; + lapl = lapl_zero; + tau = tau_zero; + } - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + // Get Reference Values + std::vector + exc_ref( len_exc_buffer ), + vrho_ref( len_vrho_buffer ), + vsigma_ref( len_vsigma_buffer ), + vlapl_ref( len_vlapl_buffer ), + vtau_ref( len_vtau_buffer ); - SECTION( "MGGA Functionals: VXC + FXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector + v2rho2_ref( len_v2rho2 ), + v2rhosigma_ref( len_v2rhosigma ), + v2rholapl_ref( len_v2rholapl ), + v2rhotau_ref( len_v2rhotau ), + v2sigma2_ref( len_v2sigma2 ), + v2sigmalapl_ref( len_v2sigmalapl ), + v2sigmatau_ref( len_v2sigmatau ), + v2lapl2_ref( len_v2lapl2 ), + v2lapltau_ref( len_v2lapltau ), + v2tau2_ref( len_v2tau2 ); - SECTION( "LDA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + if( interface == TestInterface::EXC or interface == TestInterface::EXC_INC ) { - SECTION( "LDA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + if( func.is_lda() ) + func.eval_exc( npts, rho.data(), exc_ref.data() ); + else if( func.is_gga() ) + func.eval_exc( npts, rho.data(), sigma.data(), exc_ref.data() ); + else if( func.is_mgga() ) + func.eval_exc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), exc_ref.data() ); - SECTION( "LDA Functionals: FXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } - - SECTION( "LDA Functionals: VXC + FXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } - - SECTION( "GGA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + } else if( interface == TestInterface::EXC_VXC or interface == TestInterface::EXC_VXC_INC ) { - SECTION( "GGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + if( func.is_lda() ) + func.eval_exc_vxc( npts, rho.data(), exc_ref.data(), vrho_ref.data() ); + else if( func.is_gga() ) + func.eval_exc_vxc( npts, rho.data(), sigma.data(), exc_ref.data(), + vrho_ref.data(), vsigma_ref.data() ); + else if( func.is_mgga() ) + func.eval_exc_vxc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), + exc_ref.data(), vrho_ref.data(), vsigma_ref.data(), vlapl_ref.data(), vtau_ref.data() ); - SECTION( "GGA Functionals: FXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + } else if( interface == TestInterface::FXC or interface == TestInterface::FXC_INC ) { - SECTION( "GGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( func.is_lda() ) + func.eval_fxc( npts, rho.data(), v2rho2_ref.data() ); + else if( func.is_gga() ) + func.eval_fxc( npts, rho.data(), sigma.data(), v2rho2_ref.data(), + v2rhosigma_ref.data(), v2sigma2_ref.data() ); + else if( func.is_mgga() ) + func.eval_fxc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), + v2rho2_ref.data(), v2rhosigma_ref.data(), v2rholapl_ref.data(), + v2rhotau_ref.data(), v2sigma2_ref.data(), v2sigmalapl_ref.data(), + v2sigmatau_ref.data(), v2lapl2_ref.data(), v2lapltau_ref.data(), + v2tau2_ref.data() ); + } else if( interface == TestInterface::VXC_FXC or interface == TestInterface::VXC_FXC_INC ) { + if( func.is_lda() ) + func.eval_vxc_fxc( npts, rho.data(), vrho_ref.data(), v2rho2_ref.data() ); + else if( func.is_gga() ) + func.eval_vxc_fxc( npts, rho.data(), sigma.data(), vrho_ref.data(), + vsigma_ref.data(), v2rho2_ref.data(), v2rhosigma_ref.data(), + v2sigma2_ref.data() ); + else if( func.is_mgga() ) + func.eval_vxc_fxc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), + vrho_ref.data(), vsigma_ref.data(), vlapl_ref.data(), vtau_ref.data(), + v2rho2_ref.data(), v2rhosigma_ref.data(), v2rholapl_ref.data(), + v2rhotau_ref.data(), v2sigma2_ref.data(), v2sigmalapl_ref.data(), + v2sigmatau_ref.data(), v2lapl2_ref.data(), v2lapltau_ref.data(), + v2tau2_ref.data() ); + } - SECTION( "MGGA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - SECTION( "MGGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - SECTION( "MGGA Functionals: FXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } - SECTION( "MGGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + // Allocate device memory + double* rho_device = safe_sycl_malloc( len_rho_buffer , q); + double* sigma_device = safe_sycl_malloc( len_sigma_buffer , q); + double* lapl_device = safe_sycl_malloc( len_lapl_buffer , q); + double* tau_device = safe_sycl_malloc( len_tau_buffer , q); + double* exc_device = safe_sycl_malloc( len_exc_buffer , q); + double* vrho_device = safe_sycl_malloc( len_vrho_buffer , q); + double* vsigma_device = safe_sycl_malloc( len_vsigma_buffer , q); + double* vlapl_device = safe_sycl_malloc( len_vlapl_buffer , q); + double* vtau_device = safe_sycl_malloc( len_vtau_buffer , q); + + double* v2rho2_device = safe_sycl_malloc( len_v2rho2 , q); + double* v2rhosigma_device = safe_sycl_malloc( len_v2rhosigma , q); + double* v2rholapl_device = safe_sycl_malloc( len_v2rholapl , q); + double* v2rhotau_device = safe_sycl_malloc( len_v2rhotau , q); + double* v2sigma2_device = safe_sycl_malloc( len_v2sigma2 , q); + double* v2sigmalapl_device = safe_sycl_malloc( len_v2sigmalapl , q); + double* v2sigmatau_device = safe_sycl_malloc( len_v2sigmatau , q); + double* v2lapl2_device = safe_sycl_malloc( len_v2lapl2 , q); + double* v2lapltau_device = safe_sycl_malloc( len_v2lapltau , q); + double* v2tau2_device = safe_sycl_malloc( len_v2tau2 , q); + // H2D Copy of rho / sigma + safe_sycl_cpy( rho_device, rho.data(), len_rho_buffer, q); + if( func.is_gga() or func.is_mgga() ) + safe_sycl_cpy( sigma_device, sigma.data(), len_sigma_buffer, q); + if( func.is_mgga() ) + safe_sycl_cpy( tau_device, tau.data(), len_tau_buffer, q); + if( func.needs_laplacian() ) + safe_sycl_cpy( lapl_device, lapl.data(), len_lapl_buffer, q); + const double alpha = 3.14; + const double fill_val_e = 0.1; + const double fill_val_vr = 1.; + const double fill_val_vs = 2.; + const double fill_val_vl = 3.; + const double fill_val_vt = 4.; + const double fill_val_v2rho2 = 10.; + const double fill_val_v2rhosigma = 11.; + const double fill_val_v2rholapl = 12.; + const double fill_val_v2rhotau = 13.; + const double fill_val_v2sigma2 = 14.; + const double fill_val_v2sigmalapl = 15.; + const double fill_val_v2sigmatau = 16.; + const double fill_val_v2lapl2 = 17.; + const double fill_val_v2lapltau = 18.; + const double fill_val_v2tau2 = 19.; - SECTION( "LDA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + std::vector + exc( len_exc_buffer, fill_val_e ), vrho( len_vrho_buffer, fill_val_vr ), + vsigma( len_vsigma_buffer, fill_val_vs ), vlapl(len_vlapl_buffer, fill_val_vl), + vtau(len_vtau_buffer, fill_val_vt); + std::vector + v2rho2( len_v2rho2, fill_val_v2rho2 ), + v2rhosigma( len_v2rhosigma, fill_val_v2rhosigma ), + v2rholapl( len_v2rholapl, fill_val_v2rholapl ), + v2rhotau( len_v2rhotau, fill_val_v2rhotau ), + v2sigma2( len_v2sigma2, fill_val_v2sigma2 ), + v2sigmalapl( len_v2sigmalapl, fill_val_v2sigmalapl ), + v2sigmatau( len_v2sigmatau, fill_val_v2sigmatau ), + v2lapl2( len_v2lapl2, fill_val_v2lapl2 ), + v2lapltau( len_v2lapltau, fill_val_v2lapltau ), + v2tau2( len_v2tau2, fill_val_v2tau2 ); - SECTION( "LDA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + // H2D copy of initial values, tests clobber / increment + safe_sycl_cpy( exc_device, exc.data(), len_exc_buffer, q); + safe_sycl_cpy( vrho_device, vrho.data(), len_vrho_buffer, q); + safe_sycl_cpy( v2rho2_device, v2rho2.data(), len_v2rho2, q); + if( func.is_gga() or func.is_mgga() ){ + safe_sycl_cpy( vsigma_device, vsigma.data(), len_vsigma_buffer, q); + safe_sycl_cpy( v2rhosigma_device, v2rhosigma.data(), len_v2rhosigma, q); + safe_sycl_cpy( v2sigma2_device, v2sigma2.data(), len_v2sigma2, q); + } + if( func.is_mgga() ){ + safe_sycl_cpy( vtau_device, vtau.data(), len_vtau_buffer, q); + safe_sycl_cpy( v2rhotau_device, v2rhotau.data(), len_v2rhotau, q); + safe_sycl_cpy( v2sigmatau_device, v2sigmatau.data(), len_v2sigmatau, q); + safe_sycl_cpy( v2tau2_device, v2tau2.data(), len_v2tau2, q); + } + if( func.needs_laplacian() ){ + safe_sycl_cpy( vlapl_device, vlapl.data(), len_vlapl_buffer, q); + safe_sycl_cpy( v2rholapl_device, v2rholapl.data(), len_v2rholapl, q); + safe_sycl_cpy( v2sigmalapl_device, v2sigmalapl.data(), len_v2sigmalapl, q); + safe_sycl_cpy( v2lapl2_device, v2lapl2.data(), len_v2lapl2, q); + safe_sycl_cpy( v2lapltau_device, v2lapltau.data(), len_v2lapltau, q); + } - SECTION( "LDA Functionals: FXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } - SECTION( "LDA Functionals: VXC + FXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + // Evaluate functional on device + if( interface == TestInterface::EXC ) { - SECTION( "GGA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + if( func.is_lda() ) + func.eval_exc_device( npts, rho_device, exc_device, &q ); + else if( func.is_gga() ) + func.eval_exc_device( npts, rho_device, sigma_device, exc_device, + &q ); + else if( func.is_mgga() ) + func.eval_exc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + exc_device, &q ); - SECTION( "GGA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + } else if( interface == TestInterface::EXC_INC ) { - SECTION( "GGA Functionals: FXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( func.is_lda() ) + func.eval_exc_inc_device( alpha, npts, rho_device, exc_device, &q ); + else if( func.is_gga() ) + func.eval_exc_inc_device( alpha, npts, rho_device, sigma_device, exc_device, + &q ); + else if( func.is_mgga() ) + func.eval_exc_inc_device( alpha, npts, rho_device, sigma_device, lapl_device, + tau_device, exc_device, &q ); - SECTION( "GGA Functionals: VXC + FXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + } else if( interface == TestInterface::EXC_VXC ) { - SECTION( "MGGA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + if( func.is_lda() ) + func.eval_exc_vxc_device( npts, rho_device, exc_device, vrho_device, &q ); + else if( func.is_gga() ) + func.eval_exc_vxc_device( npts, rho_device, sigma_device, exc_device, + vrho_device, vsigma_device, &q ); + else if( func.is_mgga() ) + func.eval_exc_vxc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + exc_device, vrho_device, vsigma_device, vlapl_device, vtau_device, &q ); - SECTION( "MGGA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + } else if( interface == TestInterface::EXC_VXC_INC ) { - SECTION( "MGGA Functionals: FXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( func.is_lda() ) + func.eval_exc_vxc_inc_device( alpha, npts, rho_device, exc_device, + vrho_device, &q ); + else if( func.is_gga() ) + func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, + exc_device, vrho_device, vsigma_device, &q ); + else if( func.is_mgga() ) + func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, + lapl_device, tau_device, exc_device, vrho_device, vsigma_device, + vlapl_device, vtau_device, &q ); - SECTION( "MGGA Functionals: VXC + FXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + } else if( interface == TestInterface::FXC ) { - SECTION( "LDA Functionals: EXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( func.is_lda() ) + func.eval_fxc_device( npts, rho_device, v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_fxc_device( npts, rho_device, sigma_device, v2rho2_device, + v2rhosigma_device, v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_fxc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + v2rho2_device, v2rhosigma_device, v2rholapl_device, v2rhotau_device, + v2sigma2_device, v2sigmalapl_device, v2sigmatau_device, + v2lapl2_device, v2lapltau_device, v2tau2_device, &q ); + } else if( interface == TestInterface::FXC_INC ) { + if( func.is_lda() ) + func.eval_fxc_inc_device( alpha, npts, rho_device, v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_fxc_inc_device( alpha, npts, rho_device, sigma_device, + v2rho2_device, v2rhosigma_device, v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_fxc_inc_device( alpha, npts, rho_device, sigma_device, + lapl_device, tau_device, v2rho2_device, v2rhosigma_device, + v2rholapl_device, v2rhotau_device, v2sigma2_device, + v2sigmalapl_device, v2sigmatau_device, v2lapl2_device, + v2lapltau_device, v2tau2_device, &q ); + } else if( interface == TestInterface::VXC_FXC ) { - SECTION( "LDA Functionals: EXC + VXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: FXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: VXC + FXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( func.is_lda() ) + func.eval_vxc_fxc_device( npts, rho_device, vrho_device, v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_vxc_fxc_device( npts, rho_device, sigma_device, vrho_device, + vsigma_device, v2rho2_device, v2rhosigma_device, v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_vxc_fxc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + vrho_device, vsigma_device, vlapl_device, vtau_device, + v2rho2_device, v2rhosigma_device, v2rholapl_device, + v2rhotau_device, v2sigma2_device, v2sigmalapl_device, + v2sigmatau_device, v2lapl2_device, v2lapltau_device, + v2tau2_device, &q ); + } else if( interface == TestInterface::VXC_FXC_INC ) { + if( func.is_lda() ) + func.eval_vxc_fxc_inc_device( alpha, npts, rho_device, vrho_device, + v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_vxc_fxc_inc_device( alpha, npts, rho_device, sigma_device, + vrho_device, vsigma_device, v2rho2_device, v2rhosigma_device, + v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_vxc_fxc_inc_device( alpha, npts, rho_device, sigma_device, + lapl_device, tau_device, vrho_device, vsigma_device, + vlapl_device, vtau_device, v2rho2_device, v2rhosigma_device, + v2rholapl_device, v2rhotau_device, v2sigma2_device, + v2sigmalapl_device, v2sigmatau_device, v2lapl2_device, + v2lapltau_device, v2tau2_device, &q ); + } + device_synchronize( q ); - SECTION( "GGA Functionals: EXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } + // D2H of results + safe_sycl_cpy( exc.data(), exc_device, len_exc_buffer, q); + safe_sycl_cpy( vrho.data(), vrho_device, len_vrho_buffer, q); + safe_sycl_cpy( v2rho2.data(), v2rho2_device, len_v2rho2, q); + if( func.is_gga() or func.is_mgga() ){ + safe_sycl_cpy( vsigma.data(), vsigma_device, len_vsigma_buffer, q); + safe_sycl_cpy( v2rhosigma.data(), v2rhosigma_device, len_v2rhosigma, q); + safe_sycl_cpy( v2sigma2.data(), v2sigma2_device, len_v2sigma2, q); + } + if( func.is_mgga() ){ + safe_sycl_cpy( vtau.data(), vtau_device, len_vtau_buffer, q); + safe_sycl_cpy( v2rhotau.data(), v2rhotau_device, len_v2rhotau, q); + safe_sycl_cpy( v2sigmatau.data(), v2sigmatau_device, len_v2sigmatau, q); + safe_sycl_cpy( v2tau2.data(), v2tau2_device, len_v2tau2, q); + } + if( func.needs_laplacian() ){ + safe_sycl_cpy( vlapl.data(), vlapl_device, len_vlapl_buffer, q); + safe_sycl_cpy( v2rholapl.data(), v2rholapl_device, len_v2rholapl, q); + safe_sycl_cpy( v2sigmalapl.data(), v2sigmalapl_device, len_v2sigmalapl, q); + safe_sycl_cpy( v2lapl2.data(), v2lapl2_device, len_v2lapl2, q); + safe_sycl_cpy( v2lapltau.data(), v2lapltau_device, len_v2lapltau, q); + } - SECTION( "GGA Functionals: EXC + VXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } + // Check correctness + if( interface == TestInterface::EXC_INC or interface == TestInterface::EXC_VXC_INC ) { + for( auto i = 0ul; i < len_exc_buffer; ++i ) + CHECK( exc[i] == Approx(fill_val_e + alpha * exc_ref[i]) ); + } else if( interface == TestInterface::EXC or interface == TestInterface::EXC_VXC ) { + for( auto i = 0ul; i < len_exc_buffer; ++i ) + CHECK( exc[i] == Approx(exc_ref[i]) ); + } - SECTION( "GGA Functionals: FXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( interface == TestInterface::EXC_VXC_INC or interface == TestInterface::VXC_FXC_INC ) { - SECTION( "GGA Functionals: VXC + FXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } + for( auto i = 0ul; i < len_vrho_buffer; ++i ) + CHECK( vrho[i] == Approx(fill_val_vr + alpha * vrho_ref[i]) ); + for( auto i = 0ul; i < len_vsigma_buffer; ++i ) + CHECK( vsigma[i] == Approx(fill_val_vs + alpha * vsigma_ref[i]) ); + for( auto i = 0ul; i < len_vlapl_buffer; ++i ) + CHECK( vlapl[i] == Approx(fill_val_vl + alpha * vlapl_ref[i]) ); + for( auto i = 0ul; i < len_vtau_buffer; ++i ) + CHECK( vtau[i] == Approx(fill_val_vt + alpha * vtau_ref[i]) ); - SECTION( "MGGA Functionals: EXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } + } else if(interface == TestInterface::EXC_VXC or interface == TestInterface::VXC_FXC) { - SECTION( "MGGA Functionals: EXC + VXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_vrho_buffer; ++i ){ + INFO( "Kernel is " << kern ); + CHECK( vrho[i] == Approx(vrho_ref[i]) ); } - - SECTION( "MGGA Functionals: FXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_vsigma_buffer; ++i ) { + INFO( "vsigma Fails: Kernel is " << kern << ", builtin device = " << vsigma[i] << ", builtin = " << vsigma_ref[i] ); + bool is_close = (vsigma[i] == Approx(vsigma_ref[i]) || vsigma[i] == Approx(vsigma_ref[i]).margin(1e-13)); + CHECK( is_close ); } - - SECTION( "MGGA Functionals: VXC + FXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_vlapl_buffer; ++i ) { + INFO( "Kernel is " << kern ); + CHECK( vlapl[i] == Approx(vlapl_ref[i]).margin(std::numeric_limits::epsilon()) ); } - - SECTION( "LDA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_vtau_buffer; ++i ) { + INFO( "Kernel is " << kern << std::scientific << " " << vtau[i] << " " << vtau_ref[i] ); + CHECK( vtau[i] == Approx(vtau_ref[i]).margin(std::numeric_limits::epsilon()) ); } + } - SECTION( "LDA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); + if( interface == TestInterface::FXC or interface == TestInterface::VXC_FXC ) { + for( auto i = 0ul; i < len_v2rho2; ++i ) { + INFO( "V2RHO2 Fails: Kernel is " << kern << ", builtin device = " << v2rho2[i] << ", builtin = " << v2rho2_ref[i] ); + bool is_close = (v2rho2[i] == Approx(v2rho2_ref[i]) || v2rho2[i] == Approx(v2rho2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "LDA Functionals: FXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2rhosigma; ++i ) { + INFO( "V2RHOSIGMA Fails: Kernel is " << kern << ", builtin device = " << v2rhosigma[i] << ", builtin = " << v2rhosigma_ref[i] ); + bool is_close = (v2rhosigma[i] == Approx(v2rhosigma_ref[i]) || v2rhosigma[i] == Approx(v2rhosigma_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "LDA Functionals: VXC + FXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2rholapl; ++i ) { + INFO( "V2RHOLAPL Fails: Kernel is " << kern << ", builtin device = " << v2rholapl[i] << ", builtin = " << v2rholapl_ref[i] ); + bool is_close = (v2rholapl[i] == Approx(v2rholapl_ref[i]) || v2rholapl[i] == Approx(v2rholapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "GGA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2rhotau; ++i ) { + INFO( "V2RHOTAU Fails: Kernel is " << kern << ", builtin device = " << v2rhotau[i] << ", builtin = " << v2rhotau_ref[i] ); + bool is_close = (v2rhotau[i] == Approx(v2rhotau_ref[i]) || v2rhotau[i] == Approx(v2rhotau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "GGA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2sigma2; ++i ) { + INFO( "V2SIGMA2 Fails: Kernel is " << kern << ", builtin device = " << v2sigma2[i] << ", builtin = " << v2sigma2_ref[i] ); + bool is_close = (v2sigma2[i] == Approx(v2sigma2_ref[i]) || v2sigma2[i] == Approx(v2sigma2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "GGA Functionals: FXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigmalapl; ++i ) { + INFO( "V2SIGMALAPL Fails: Kernel is " << kern << ", builtin device = " << v2sigmalapl[i] << ", builtin = " << v2sigmalapl_ref[i] ); + bool is_close = (v2sigmalapl[i] == Approx(v2sigmalapl_ref[i]) || v2sigmalapl[i] == Approx(v2sigmalapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "GGA Functionals: VXC + FXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigmatau; ++i ) { + INFO( "V2SIGMATAU Fails: Kernel is " << kern << ", builtin device = " << v2sigmatau[i] << ", builtin = " << v2sigmatau_ref[i] ); + bool is_close = (v2sigmatau[i] == Approx(v2sigmatau_ref[i]) || v2sigmatau[i] == Approx(v2sigmatau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "MGGA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2lapl2; ++i ) { + INFO( "V2LAPL2 Fails: Kernel is " << kern << ", builtin device = " << v2lapl2[i] << ", builtin = " << v2lapl2_ref[i] ); + bool is_close = (v2lapl2[i] == Approx(v2lapl2_ref[i]) || v2lapl2[i] == Approx(v2lapl2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "MGGA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2lapltau; ++i ) { + INFO( "V2LAPLTAU Fails: Kernel is " << kern << ", builtin device = " << v2lapltau[i] << ", builtin = " << v2lapltau_ref[i] ); + bool is_close = (v2lapltau[i] == Approx(v2lapltau_ref[i]) || v2lapltau[i] == Approx(v2lapltau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "MGGA Functionals: FXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2tau2; ++i ) { + INFO( "V2TAU2 Fails: Kernel is " << kern << ", builtin device = " << v2tau2[i] << ", builtin = " << v2tau2_ref[i] ); + bool is_close = (v2tau2[i] == Approx(v2tau2_ref[i]) || v2tau2[i] == Approx(v2tau2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION( "MGGA Functionals: VXC + FXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } + } else if( interface == TestInterface::FXC_INC or interface == TestInterface::VXC_FXC_INC ) { + for( auto i = 0ul; i < len_v2rho2; ++i ) { + INFO( "V2RHO2 Fails: Kernel is " << kern << ", builtin device = " << v2rho2[i] << ", builtin = " << v2rho2_ref[i] ); + bool is_close = (v2rho2[i] == Approx(fill_val_v2rho2 + alpha * v2rho2_ref[i]) || v2rho2[i] == Approx(fill_val_v2rho2 + alpha * v2rho2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - } - - SECTION( "Builtin Functionals" ) { - - SECTION("EXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); + for( auto i = 0ul; i < len_v2rhosigma; ++i ) { + INFO( "V2RHOSIGMA Fails: Kernel is " << kern << ", builtin device = " << v2rhosigma[i] << ", builtin = " << v2rhosigma_ref[i] ); + bool is_close = (v2rhosigma[i] == Approx(fill_val_v2rhosigma + alpha * v2rhosigma_ref[i]) || v2rhosigma[i] == Approx(fill_val_v2rhosigma + alpha * v2rhosigma_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + VXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); + for( auto i = 0ul; i < len_v2rholapl; ++i ) { + INFO( "V2RHOLAPL Fails: Kernel is " << kern << ", builtin device = " << v2rholapl[i] << ", builtin = " << v2rholapl_ref[i] ); + bool is_close = (v2rholapl[i] == Approx(fill_val_v2rholapl + alpha * v2rholapl_ref[i]) || v2rholapl[i] == Approx(fill_val_v2rholapl + alpha * v2rholapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("EXC + VXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("FXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + VXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("FXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + VXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("FXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("EXC + VXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("FXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("EXC + VXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("FXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - - - SECTION("EXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - - SECTION("EXC + VXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - - SECTION("FXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("VXC + FXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("EXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - - SECTION("EXC + VXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - - SECTION("FXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("VXC + FXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("EXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("EXC + VXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("FXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("VXC + FXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("EXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("EXC + VXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("FXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("VXC + FXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } - } - - SECTION("EXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } - - SECTION("EXC + VXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2rhotau; ++i ) { + INFO( "V2RHOTAU Fails: Kernel is " << kern << ", builtin device = " << v2rhotau[i] << ", builtin = " << v2rhotau_ref[i] ); + bool is_close = (v2rhotau[i] == Approx(fill_val_v2rhotau + alpha * v2rhotau_ref[i]) || v2rhotau[i] == Approx(fill_val_v2rhotau + alpha * v2rhotau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigma2; ++i ) { + INFO( "V2SIGMA2 Fails: Kernel is " << kern << ", builtin device = " << v2sigma2[i] << ", builtin = " << v2sigma2_ref[i] ); + bool is_close = (v2sigma2[i] == Approx(fill_val_v2sigma2 + alpha * v2sigma2_ref[i]) || v2sigma2[i] == Approx(fill_val_v2sigma2 + alpha * v2sigma2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("VXC + FXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigmalapl; ++i ) { + INFO( "V2SIGMALAPL Fails: Kernel is " << kern << ", builtin device = " << v2sigmalapl[i] << ", builtin = " << v2sigmalapl_ref[i] ); + bool is_close = (v2sigmalapl[i] == Approx(fill_val_v2sigmalapl + alpha * v2sigmalapl_ref[i]) || v2sigmalapl[i] == Approx(fill_val_v2sigmalapl + alpha * v2sigmalapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2sigmatau; ++i ) { + INFO( "V2SIGMATAU Fails: Kernel is " << kern << ", builtin device = " << v2sigmatau[i] << ", builtin = " << v2sigmatau_ref[i] ); + bool is_close = (v2sigmatau[i] == Approx(fill_val_v2sigmatau + alpha * v2sigmatau_ref[i]) || v2sigmatau[i] == Approx(fill_val_v2sigmatau + alpha * v2sigmatau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + VXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2lapl2; ++i ) { + INFO( "V2LAPL2 Fails: Kernel is " << kern << ", builtin device = " << v2lapl2[i] << ", builtin = " << v2lapl2_ref[i] ); + bool is_close = (v2lapl2[i] == Approx(fill_val_v2lapl2 + alpha * v2lapl2_ref[i]) || v2lapl2[i] == Approx(fill_val_v2lapl2 + alpha * v2lapl2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2lapltau; ++i ) { + INFO( "V2LAPLTAU Fails: Kernel is " << kern << ", builtin device = " << v2lapltau[i] << ", builtin = " << v2lapltau_ref[i] ); + bool is_close = (v2lapltau[i] == Approx(fill_val_v2lapltau + alpha * v2lapltau_ref[i]) || v2lapltau[i] == Approx(fill_val_v2lapltau + alpha * v2lapltau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("VXC + FXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2tau2; ++i ) { + INFO( "V2TAU2 Fails: Kernel is " << kern << ", builtin device = " << v2tau2[i] << ", builtin = " << v2tau2_ref[i] ); + bool is_close = (v2tau2[i] == Approx(fill_val_v2tau2 + alpha * v2tau2_ref[i]) || v2tau2[i] == Approx(fill_val_v2tau2 + alpha * v2tau2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - } - - -} - -#endif - - - -#ifdef EXCHCXX_ENABLE_SYCL - -template -T* safe_sycl_malloc( size_t n, sycl::queue& q ) { - if( n ) { - T* ptr = sycl::malloc_device(n, q); - return ptr; - } else return nullptr; -} - -template -void safe_sycl_cpy( T* dest, const T* src, size_t len, sycl::queue& q ) { - - q.memcpy( (void*)dest, (const void*)src, len*sizeof(T) ); - -} - -void sycl_free_all(sycl::queue&){ } -template -void sycl_free_all( sycl::queue& q, T* ptr, Args&&... args ) { - - if( ptr ) { - sycl::free( (void*)ptr, q ); } + // Free device memory + sycl_free_all( q, rho_device, sigma_device, exc_device, vrho_device, vsigma_device, lapl_device, tau_device, + vlapl_device, vtau_device, + v2rho2_device, v2rhosigma_device, v2rholapl_device, v2rhotau_device, + v2sigma2_device, v2sigmalapl_device, v2sigmatau_device, + v2lapl2_device, v2lapltau_device, v2tau2_device ); - sycl_free_all( q, std::forward(args)... ); - -} - -void device_synchronize( sycl::queue& q ) { -q.wait_and_throw(); } +#endif // EXCHCXX_ENABLE_SYCL -void test_sycl_interface( TestInterface interface, EvalType evaltype, - Backend backend, Kernel kern, Spin polar, sycl::queue& q ) { - - auto [npts_lda, ref_rho] = load_reference_density( polar ); - auto [npts_gga, ref_sigma] = load_reference_sigma ( polar ); - - REQUIRE( npts_lda == npts_gga ); - - const int npts = npts_lda; - - XCKernel func( backend, kern, polar ); - - size_t len_rho_buffer = func.rho_buffer_len(npts); - size_t len_sigma_buffer = func.sigma_buffer_len(npts); - size_t len_exc_buffer = func.exc_buffer_len(npts); - size_t len_vrho_buffer = func.vrho_buffer_len(npts); - size_t len_vsigma_buffer = func.vsigma_buffer_len(npts); - - - std::vector rho_small(len_rho_buffer, 1e-13); - std::vector sigma_small(len_sigma_buffer, 1e-14); - std::vector rho_zero(len_rho_buffer, 0.); - std::vector sigma_zero(len_sigma_buffer, 0.); - std::vector rho, sigma; +#ifdef EXCHCXX_ENABLE_DEVICE +TEST_CASE( "GPU Interfaces", "[xc-device]" ) { - if( evaltype == EvalType::Regular ) { - rho = ref_rho; - sigma = ref_sigma; - } + SECTION( "Libxc Functionals" ) { - if( evaltype == EvalType::Small ) { - rho = rho_small; - sigma = sigma_small; - } - if( evaltype == EvalType::Zero ) { - rho = rho_zero; - sigma = sigma_zero; - } + SECTION( "LDA Functionals: EXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - // Get Reference Values - std::vector - exc_ref( len_exc_buffer ), - vrho_ref( len_vrho_buffer ), - vsigma_ref( len_vsigma_buffer ); + SECTION( "LDA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - if( interface == TestInterface::EXC or interface == TestInterface::EXC_INC ) { + SECTION( "LDA Functionals: FXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc( npts, rho.data(), exc_ref.data() ); - else if( func.is_gga() ) - func.eval_exc( npts, rho.data(), sigma.data(), exc_ref.data() ); + SECTION( "LDA Functionals: VXC + FXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_VXC or interface == TestInterface::EXC_VXC_INC ) { + SECTION( "GGA Functionals: EXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - if( func.is_lda() ) - func.eval_exc_vxc( npts, rho.data(), exc_ref.data(), vrho_ref.data() ); - else if( func.is_gga() ) - func.eval_exc_vxc( npts, rho.data(), sigma.data(), exc_ref.data(), - vrho_ref.data(), vsigma_ref.data() ); + SECTION( "GGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - } + SECTION( "GGA Functionals: FXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } + SECTION( "GGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } + SECTION( "MGGA Functionals: EXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + SECTION( "MGGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + SECTION( "MGGA Functionals: FXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } + SECTION( "MGGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - // Allocate device memory - double* rho_device = safe_sycl_malloc( len_rho_buffer , q ); - double* sigma_device = safe_sycl_malloc( len_sigma_buffer , q ); - double* exc_device = safe_sycl_malloc( len_exc_buffer , q ); - double* vrho_device = safe_sycl_malloc( len_vrho_buffer , q ); - double* vsigma_device = safe_sycl_malloc( len_vsigma_buffer, q ); - // H2D Copy of rho / sigma - safe_sycl_cpy( rho_device, rho.data(), len_rho_buffer, q ); - if( func.is_gga() ) - safe_sycl_cpy( sigma_device, sigma.data(), len_sigma_buffer, q ); - const double alpha = 3.14; - const double fill_val_e = 2.; - const double fill_val_vr = 10.; - const double fill_val_vs = 50.; + SECTION( "LDA Functionals: EXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - std::vector - exc( len_exc_buffer, fill_val_e ), vrho( len_vrho_buffer, fill_val_vr ), - vsigma( len_vsigma_buffer, fill_val_vs ); + SECTION( "LDA Functionals: EXC + VXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - // H2D copy of initial values, tests clobber / increment - safe_sycl_cpy( exc_device, exc.data(), len_exc_buffer, q ); - safe_sycl_cpy( vrho_device, vrho.data(), len_vrho_buffer, q ); - if( func.is_gga() ) - safe_sycl_cpy( vsigma_device, vsigma.data(), len_vsigma_buffer, q ); + SECTION( "LDA Functionals: FXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - q.wait(); + SECTION( "LDA Functionals: VXC + FXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - // Evaluate functional on device - if( interface == TestInterface::EXC ) { + SECTION( "GGA Functionals: EXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc_device( npts, rho_device, exc_device, &q ); - else if( func.is_gga() ) - func.eval_exc_device( npts, rho_device, sigma_device, exc_device, - &q ); + SECTION( "GGA Functionals: EXC + VXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_INC ) { + SECTION( "GGA Functionals: FXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc_inc_device( alpha, npts, rho_device, exc_device, &q ); - else if( func.is_gga() ) - func.eval_exc_inc_device( alpha, npts, rho_device, sigma_device, exc_device, - &q ); + SECTION( "GGA Functionals: VXC + FXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_VXC ) { + SECTION( "MGGA Functionals: EXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc_vxc_device( npts, rho_device, exc_device, vrho_device, &q ); - else if( func.is_gga() ) - func.eval_exc_vxc_device( npts, rho_device, sigma_device, exc_device, - vrho_device, vsigma_device, &q ); + SECTION( "MGGA Functionals: EXC + VXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_VXC_INC ) { + SECTION( "MGGA Functionals: FXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; - if( func.is_lda() ) - func.eval_exc_vxc_inc_device( alpha, npts, rho_device, exc_device, - vrho_device, &q ); - else if( func.is_gga() ) - func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, - exc_device, vrho_device, vsigma_device, &q ); + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } + SECTION( "MGGA Functionals: VXC + FXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - device_synchronize( q ); + SECTION( "LDA Functionals: EXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - // D2H of results - safe_sycl_cpy( exc.data(), exc_device, len_exc_buffer, q ); - safe_sycl_cpy( vrho.data(), vrho_device, len_vrho_buffer, q ); - if(func.is_gga()) - safe_sycl_cpy( vsigma.data(), vsigma_device, len_vsigma_buffer, q ); + SECTION( "LDA Functionals: EXC + VXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - device_synchronize( q ); - // Check correctness - if( interface == TestInterface::EXC_INC or interface == TestInterface::EXC_VXC_INC ) { - for( auto i = 0ul; i < len_exc_buffer; ++i ) - CHECK( exc[i] == Approx(fill_val_e + alpha * exc_ref[i]) ); - } else { - for( auto i = 0ul; i < len_exc_buffer; ++i ) - CHECK( exc[i] == Approx(exc_ref[i]) ); - } + SECTION( "LDA Functionals: FXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( interface == TestInterface::EXC_VXC_INC ) { + SECTION( "LDA Functionals: VXC + FXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - for( auto i = 0ul; i < len_vrho_buffer; ++i ) - CHECK( vrho[i] == Approx(fill_val_vr + alpha * vrho_ref[i]) ); - for( auto i = 0ul; i < len_vsigma_buffer; ++i ) - CHECK( vsigma[i] == Approx(fill_val_vs + alpha * vsigma_ref[i]) ); + SECTION( "GGA Functionals: EXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - } else if(interface == TestInterface::EXC_VXC) { + SECTION( "GGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - for( auto i = 0ul; i < len_vrho_buffer; ++i ) - CHECK( vrho[i] == Approx(vrho_ref[i]) ); - for( auto i = 0ul; i < len_vsigma_buffer; ++i ) { - INFO( "Kernel is " << kern ); - CHECK( vsigma[i] == Approx(vsigma_ref[i]) ); + SECTION( "GGA Functionals: FXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } } - } + SECTION( "GGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - device_synchronize( q ); - sycl_free_all( q, rho_device, sigma_device, exc_device, vrho_device, - vsigma_device ); + SECTION( "MGGA Functionals: EXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - device_synchronize( q ); -} + SECTION( "MGGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + SECTION( "MGGA Functionals: FXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } -#if 0 -struct SYCLTestFeature { - sycl::queue q; - SYCLTestFeature() : - q( sycl::gpu_selector_v, - sycl::property_list{sycl::property::queue::in_order{}} ) { } -}; -#else -struct SYCLTestFeature { - static sycl::queue q; + SECTION( "MGGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - SYCLTestFeature() {} -}; -sycl::queue SYCLTestFeature::q( - sycl::gpu_selector_v, - sycl::property_list{sycl::property::queue::in_order{}} ); -#endif -TEST_CASE_METHOD( SYCLTestFeature, "SYCL Interfaces", "[xc-device]" ) { - //std::cout << "Running on " - // << q.get_device().get_info() - // << "\n"; - SECTION( "Libxc Functionals" ) { - SECTION( "LDA Functionals: EXC Regular Eval Unpolarized" ) { + SECTION( "LDA Functionals: EXC Regular Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - - SECTION( "LDA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + SECTION( "LDA Functionals: EXC + VXC Regular Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "LDA Functionals: FXC Regular Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "LDA Functionals: VXC + FXC Regular Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "LDA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: EXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - - SECTION( "LDA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: EXC + VXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: FXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: VXC + FXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "LDA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "MGGA Functionals: EXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } + SECTION( "MGGA Functionals: EXC + VXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } - SECTION( "LDA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "MGGA Functionals: FXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "MGGA Functionals: VXC + FXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "LDA Functionals: EXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } + SECTION( "LDA Functionals: EXC + VXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "LDA Functionals: FXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "LDA Functionals: VXC + FXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "GGA Functionals: EXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "GGA Functionals: EXC + VXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "GGA Functionals: FXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } - SECTION( "LDA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "GGA Functionals: VXC + FXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } + SECTION( "MGGA Functionals: EXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } - SECTION( "LDA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: EXC + VXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: FXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: VXC + FXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "LDA Functionals: EXC Small Eval Polarized" ) { + SECTION( "LDA Functionals: EXC Zero Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "LDA Functionals: EXC + VXC Small Eval Polarized" ) { + SECTION( "LDA Functionals: EXC + VXC Zero Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Small Eval Polarized" ) { + SECTION( "LDA Functionals: FXC Zero Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "LDA Functionals: VXC + FXC Zero Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "GGA Functionals: EXC Zero Eval Polarized" ) { for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC + VXC Small Eval Polarized" ) { + SECTION( "GGA Functionals: EXC + VXC Zero Eval Polarized" ) { for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "LDA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "GGA Functionals: FXC Zero Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "GGA Functionals: VXC + FXC Zero Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "MGGA Functionals: EXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - - SECTION( "LDA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: EXC + VXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: FXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: VXC + FXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } } } @@ -3477,160 +3271,369 @@ TEST_CASE_METHOD( SYCLTestFeature, "SYCL Interfaces", "[xc-device]" ) { SECTION( "Builtin Functionals" ) { SECTION("EXC Regular: Unpolarized") { - //std::cout << "EXC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC Regular: Unpolarized") { - //std::cout << "EXC + VXC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + INC Regular: Unpolarized") { - //std::cout << "EXC + INC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC + INC Regular: Unpolarized") { - //std::cout << "EXC + VXC + INC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC + INC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC + INC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + VXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("FXC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + VXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("FXC + INC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC + INC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + INC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC + INC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC + INC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC + INC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } } + + SECTION("EXC Regular: Polarized") { - //std::cout << "EXC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC Regular: Polarized") { - //std::cout << "EXC + VXC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + INC Regular: Polarized") { - //std::cout << "EXC + INC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC + INC Regular: Polarized") { - //std::cout << "EXC + VXC + INC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC + INC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC + INC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + VXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("FXC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + VXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("FXC + INC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC + INC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + INC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC + INC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC + INC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC + INC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } } } } - -#endif +#endif // EXCHCXX_ENABLE_DEVICE