From 8bac863f2d36a36bb0d0ed39e3928a3a2777b350 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Tue, 23 Sep 2025 20:28:53 +0000 Subject: [PATCH 01/14] [SYCL] refactor SYCL --- .../impl/builtin/kernels/deorbitalized.hpp | 52 +- include/exchcxx/impl/builtin/util.hpp | 69 +- include/exchcxx/util/exchcxx_macros.hpp | 34 +- src/sycl/builtin_sycl.cxx | 1487 ++++++++-- src/sycl/exchcxx_sycl.cmake | 9 +- src/sycl/libxc_device.cxx | 376 ++- src/sycl/xc_functional_device.cxx | 557 +++- test/xc_kernel_test.cxx | 2466 ++++++++--------- 8 files changed, 3479 insertions(+), 1571 deletions(-) diff --git a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp index 474f04c..8a3a233 100644 --- a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp +++ b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -49,7 +49,7 @@ * in binary and source code form. */ -#pragma once +#pragma once #include #include #include @@ -141,8 +141,10 @@ struct kernel_traits> { double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels"); @@ -163,15 +165,17 @@ struct kernel_traits> { double& v2rhotau_a_a, double& v2rhotau_a_b, double& v2rhotau_b_a, double& v2rhotau_b_b, double& v2sigma2_aa_aa, double& v2sigma2_aa_ab, double& v2sigma2_aa_bb, double& v2sigma2_ab_ab, double& v2sigma2_ab_bb, double& v2sigma2_bb_bb, - double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, + double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, double& v2sigmalapl_ab_b, double& v2sigmalapl_bb_a, double& v2sigmalapl_bb_b, - double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, + double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, double& v2sigmatau_ab_b, double& v2sigmatau_bb_a, double& v2sigmatau_bb_b, double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, vrho_a, vrho_b, vsigma_aa, vsigma_ab, vsigma_bb, vlapl_a, vlapl_b, vtau_a, vtau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_vxc_fxc_polar not implemented for deorbitalized kernels"); @@ -181,11 +185,13 @@ struct kernel_traits> { BUILTIN_KERNEL_EVAL_RETURN eval_fxc_unpolar( double rho, double sigma, double lapl, double tau, - double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, - double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, + double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, + double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_fxc_unpolar not implemented for deorbitalized kernels"); @@ -193,8 +199,8 @@ struct kernel_traits> { } BUILTIN_KERNEL_EVAL_RETURN - eval_fxc_polar( double rho_a, double rho_b, - double sigma_aa, double sigma_ab, double sigma_bb, + eval_fxc_polar( double rho_a, double rho_b, + double sigma_aa, double sigma_ab, double sigma_bb, double lapl_a, double lapl_b, double tau_a, double tau_b, double& v2rho2_aa, double& v2rho2_ab, double& v2rho2_bb, double& v2rhosigma_a_aa, double& v2rhosigma_a_ab, double& v2rhosigma_a_bb, @@ -203,15 +209,17 @@ struct kernel_traits> { double& v2rhotau_a_a, double& v2rhotau_a_b, double& v2rhotau_b_a, double& v2rhotau_b_b, double& v2sigma2_aa_aa, double& v2sigma2_aa_ab, double& v2sigma2_aa_bb, double& v2sigma2_ab_ab, double& v2sigma2_ab_bb, double& v2sigma2_bb_bb, - double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, + double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, double& v2sigmalapl_ab_b, double& v2sigmalapl_bb_a, double& v2sigmalapl_bb_b, - double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, + double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, double& v2sigmatau_ab_b, double& v2sigmatau_bb_a, double& v2sigmatau_bb_b, double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_fxc_polar not implemented for deorbitalized kernels"); diff --git a/include/exchcxx/impl/builtin/util.hpp b/include/exchcxx/impl/builtin/util.hpp index 89d8bda..747226f 100644 --- a/include/exchcxx/impl/builtin/util.hpp +++ b/include/exchcxx/impl/builtin/util.hpp @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -118,7 +118,7 @@ SAFE_INLINE(auto) erf( T x ) { return sm::erf(x); } template SAFE_INLINE(auto) pow( T x, U e ) { return sm::pow(x,e); } template -SAFE_INLINE(auto) xc_erfcx( T x ) { return sm::exp(x*x)*sm::erfc(x); } +SAFE_INLINE(auto) xc_erfcx( T x ) { return sm::exp(x*x)*sm::erfc(x); } @@ -139,11 +139,16 @@ SAFE_INLINE(auto) xc_cheb_eval(const double x, const double *cs, const int N) return 0.5*(b0 - b2); } + // The following data is taken from libxc #if defined(__CUDACC__) || defined(__HIPCC__) -__device__ +__device__ static +#elif defined(__SYCL_DEVICE_ONLY__) +inline constexpr +#else +static #endif -static double AE11_data[39] = { +double AE11_data[39] = { 0.121503239716065790, -0.065088778513550150, 0.004897651357459670, -0.000649237843027216, 0.000093840434587471, 0.000000420236380882, -0.000008113374735904, 0.000002804247688663, 0.000000056487164441, -0.000000344809174450, 0.000000058209273578, 0.000000038711426349, -0.000000012453235014, -0.000000005118504888, 0.000000002148771527, @@ -155,9 +160,13 @@ static double AE11_data[39] = { }; #if defined(__CUDACC__) || defined(__HIPCC__) -__device__ +__device__ static +#elif defined(__SYCL_DEVICE_ONLY__) +inline constexpr +#else +static #endif -static double AE12_data[25] = { +double AE12_data[25] = { 0.582417495134726740, -0.158348850905782750, -0.006764275590323141, 0.005125843950185725, 0.000435232492169391, -0.000143613366305483, -0.000041801320556301, -0.000002713395758640, 0.000001151381913647, 0.000000420650022012, 0.000000066581901391, 0.000000000662143777, -0.000000002844104870, -0.000000000940724197, -0.000000000177476602, @@ -166,9 +175,13 @@ static double AE12_data[25] = { }; #if defined(__CUDACC__) || defined(__HIPCC__) -__device__ +__device__ static +#elif defined(__SYCL_DEVICE_ONLY__) +inline constexpr +#else +static #endif -static double E11_data[19] = { +double E11_data[19] = { -16.11346165557149402600, 7.79407277874268027690, -1.95540581886314195070, 0.37337293866277945612, -0.05692503191092901938, 0.00721107776966009185, -0.00078104901449841593, 0.00007388093356262168, -0.00000620286187580820, 0.00000046816002303176, -0.00000003209288853329, 0.00000000201519974874, -0.00000000011673686816, 0.00000000000627627066, -0.00000000000031481541, @@ -176,9 +189,13 @@ static double E11_data[19] = { }; #if defined(__CUDACC__) || defined(__HIPCC__) -__device__ +__device__ static +#elif defined(__SYCL_DEVICE_ONLY__) +inline constexpr +#else +static #endif -static double E12_data[16] = { +double E12_data[16] = { -0.03739021479220279500, 0.04272398606220957700, -0.13031820798497005440, 0.01441912402469889073, -0.00134617078051068022, 0.00010731029253063780, -0.00000742999951611943, 0.00000045377325690753, -0.00000002476417211390, 0.00000000122076581374, -0.00000000005485141480, 0.00000000000226362142, -0.00000000000008635897, 0.00000000000000306291, -0.00000000000000010148, @@ -186,9 +203,13 @@ static double E12_data[16] = { }; #if defined(__CUDACC__) || defined(__HIPCC__) -__device__ +__device__ static +#elif defined(__SYCL_DEVICE_ONLY__) +inline constexpr +#else +static #endif -static double AE13_data[25] = { +double AE13_data[25] = { -0.605773246640603460, -0.112535243483660900, 0.013432266247902779, -0.001926845187381145, 0.000309118337720603, -0.000053564132129618, 0.000009827812880247, -0.000001885368984916, 0.000000374943193568, -0.000000076823455870, 0.000000016143270567, -0.000000003466802211, 0.000000000758754209, -0.000000000168864333, 0.000000000038145706, @@ -197,9 +218,13 @@ static double AE13_data[25] = { }; #if defined(__CUDACC__) || defined(__HIPCC__) -__device__ +__device__ static +#elif defined(__SYCL_DEVICE_ONLY__) +inline constexpr +#else +static #endif -static double AE14_data[26] = { +double AE14_data[26] = { -0.18929180007530170, -0.08648117855259871, 0.00722410154374659, -0.00080975594575573, 0.00010999134432661, -0.00001717332998937, 0.00000298562751447, -0.00000056596491457, 0.00000011526808397, -0.00000002495030440, 0.00000000569232420, -0.00000000135995766, 0.00000000033846628, -0.00000000008737853, 0.00000000002331588, diff --git a/include/exchcxx/util/exchcxx_macros.hpp b/include/exchcxx/util/exchcxx_macros.hpp index da84dc5..2a6c5a7 100644 --- a/include/exchcxx/util/exchcxx_macros.hpp +++ b/include/exchcxx/util/exchcxx_macros.hpp @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -79,7 +79,7 @@ #define DEVICE_PARAMS sycl::queue* queue #define DEVICE_PARAMS_NOTYPE queue - #define SYCL_KERNEL_PARAMS sycl::id<1> idx + #define SYCL_KERNEL_PARAMS sycl::id<1> tid #endif @@ -159,7 +159,7 @@ RET_GENERATOR_DEVICE( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_GENERATOR_DEVICE(func) \ RET_GENERATOR_DEVICE( LDA, VXC_FXC, func ) - + #define LDA_EXC_GENERATOR_DEVICE(func) \ void RET_LDA_EXC_GENERATOR_DEVICE(func) @@ -186,7 +186,7 @@ RET_INC_GENERATOR_DEVICE( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_INC_GENERATOR_DEVICE(func) \ RET_INC_GENERATOR_DEVICE( LDA, VXC_FXC, func ) - + #define LDA_EXC_INC_GENERATOR_DEVICE(func) \ void RET_LDA_EXC_INC_GENERATOR_DEVICE(func) @@ -217,7 +217,7 @@ RET_GENERATOR_SYCL_KERNEL( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_GENERATOR_SYCL_KERNEL(func) \ RET_GENERATOR_SYCL_KERNEL( LDA, VXC_FXC, func ) - + #define LDA_EXC_GENERATOR_SYCL_KERNEL(func) \ void RET_LDA_EXC_GENERATOR_SYCL_KERNEL(func) @@ -231,7 +231,6 @@ void RET_LDA_EXC_VXC_GENERATOR_SYCL_KERNEL(func) #define LDA_VXC_FXC_GENERATOR_SYCL_KERNEL(func) \ void RET_LDA_VXC_FXC_GENERATOR_SYCL_KERNEL(func) - #define RET_LDA_EXC_INC_GENERATOR_SYCL_KERNEL(func) \ RET_INC_GENERATOR_SYCL_KERNEL( LDA, EXC, func ) @@ -245,7 +244,7 @@ RET_INC_GENERATOR_SYCL_KERNEL( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL(func) \ RET_INC_GENERATOR_SYCL_KERNEL( LDA, VXC_FXC, func ) - + #define LDA_EXC_INC_GENERATOR_SYCL_KERNEL(func) \ void RET_LDA_EXC_INC_GENERATOR_SYCL_KERNEL(func) @@ -263,7 +262,7 @@ #endif -// GGA Generators +// GGA Generators #define RET_GGA_EXC_GENERATOR(func) RET_GENERATOR( GGA, EXC, func ) #define RET_GGA_VXC_GENERATOR(func) RET_GENERATOR( GGA, VXC, func ) @@ -580,6 +579,3 @@ } #endif - - - diff --git a/src/sycl/builtin_sycl.cxx b/src/sycl/builtin_sycl.cxx index 0225fab..1ff6f30 100644 --- a/src/sycl/builtin_sycl.cxx +++ b/src/sycl/builtin_sycl.cxx @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -56,296 +56,919 @@ namespace ExchCXX { namespace detail { +template class device_eval_exc_helper_unpolar_kernel_name; +template class device_eval_exc_helper_polar_kernel_name; +template class device_eval_exc_vxc_helper_unpolar_kernel_name; +template class device_eval_exc_vxc_helper_polar_kernel_name; +template class device_eval_fxc_helper_unpolar_kernel_name; +template class device_eval_fxc_helper_polar_kernel_name; +template class device_eval_vxc_fxc_helper_unpolar_kernel_name; +template class device_eval_vxc_fxc_helper_polar_kernel_name; +template class device_eval_exc_inc_helper_unpolar_kernel_name; +template class device_eval_exc_inc_helper_polar_kernel_name; +template class device_eval_exc_vxc_inc_helper_unpolar_kernel_name; +template class device_eval_exc_vxc_inc_helper_polar_kernel_name; +template class device_eval_fxc_inc_helper_unpolar_kernel_name; +template class device_eval_fxc_inc_helper_polar_kernel_name; +template class device_eval_vxc_fxc_inc_helper_unpolar_kernel_name; +template class device_eval_vxc_fxc_inc_helper_polar_kernel_name; + + template -inline LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_exc_unpolar( rho[tid], eps[tid] ); + +} - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_unpolar( rho_use, eps[idx] ); +template +__attribute__((always_inline)) LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + traits::eval_exc_polar( rho_i[0], rho_i[1], eps[tid] ); } template -inline LDA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { +__attribute__((always_inline)) LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_exc_vxc_unpolar( rho[tid], eps[tid], vxc[tid] ); - auto rho_i = rho + 2*idx; +} + +template +__attribute__((always_inline)) LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; - traits::eval_exc_polar( rho_a_use, rho_b_use, eps[idx] ); + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], eps[tid], + vxc_i[0], vxc_i[1] ); } + template -inline LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_fxc_unpolar( rho[tid], fxc[tid] ); + +} + +template +__attribute__((always_inline)) LDA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_polar_kernel ) { - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_vxc_unpolar( rho_use, eps[idx], vxc[idx] ); + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto v2rho2_i = fxc + 3*tid; + + traits::eval_fxc_polar( rho_i[0], rho_i[1], v2rho2_i[0], + v2rho2_i[1], v2rho2_i[2] ); } template -inline LDA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { +__attribute__((always_inline)) LDA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_vxc_fxc_unpolar( rho[tid], vxc[tid], fxc[tid] ); + +} + +template +__attribute__((always_inline)) LDA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; + auto v2rho2_i = fxc + 3*tid; - auto rho_i = rho + 2*idx; - auto vxc_i = vxc + 2*idx; + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], vxc_i[0], vxc_i[1], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2] ); - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); +} + +template +__attribute__((always_inline)) LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { + + using traits = kernel_traits; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, eps[idx], - vxc_i[0], vxc_i[1] ); + double e; + traits::eval_exc_unpolar( rho[tid], e ); + eps[tid] += scal_fact * e; } template -inline LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { using traits = kernel_traits; + auto rho_i = rho + 2*tid; double e; + traits::eval_exc_polar( rho_i[0], rho_i[1], e ); - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_unpolar( rho_use, e ); - eps[idx] += scal_fact * e; + eps[tid] += scal_fact * e; } template -inline LDA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - auto rho_i = rho + 2*idx; + double e,v; + traits::eval_exc_vxc_unpolar( rho[tid], e, v ); + eps[tid] += scal_fact * e; + vxc[tid] += scal_fact * v; + +} - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); +template +__attribute__((always_inline)) LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { - double e; - traits::eval_exc_polar( rho_a_use, rho_b_use, e ); + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; + + double v_a, v_b, e; + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], e, v_a, v_b); + eps[tid] += scal_fact * e; + vxc_i[0] += scal_fact * v_a; + vxc_i[1] += scal_fact * v_b; - eps[idx] += scal_fact * e; +} +template +__attribute__((always_inline)) LDA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + double f; + traits::eval_fxc_unpolar( rho[tid], f ); + fxc[tid] += scal_fact * f; } template -inline LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) LDA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_polar_kernel ) { + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto fxc_i = fxc + 3*tid; + double f0, f1, f2; + traits::eval_fxc_polar( rho_i[0], rho_i[1], f0, f1, f2 ); + fxc_i[0] += scal_fact * f0; + fxc_i[1] += scal_fact * f1; + fxc_i[2] += scal_fact * f2; +} +template +__attribute__((always_inline)) LDA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; + double v, f; + traits::eval_vxc_fxc_unpolar( rho[tid], v, f ); + vxc[tid] += scal_fact * v; + fxc[tid] += scal_fact * f; +} - double e,v; +template +__attribute__((always_inline)) LDA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_polar_kernel ) { + using traits = kernel_traits; + auto rho_i = rho + 2*tid; + auto vxc_i = vxc + 2*tid; + auto fxc_i = fxc + 3*tid; + double v0, v1, f0, f1, f2; + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], v0, v1, f0, f1, f2 ); + vxc_i[0] += scal_fact * v0; + vxc_i[1] += scal_fact * v1; + fxc_i[0] += scal_fact * f0; + fxc_i[1] += scal_fact * f1; + fxc_i[2] += scal_fact * f2; +} + + + + + + +template +__attribute__((always_inline)) GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + traits::eval_exc_unpolar( rho[tid], sigma[tid], eps[tid] ); + +} - const double rho_use = sycl::max( rho[idx], 0. ); - traits::eval_exc_vxc_unpolar( rho_use, e, v ); - eps[idx] += scal_fact * e; - vxc[idx] += scal_fact * v; +template +__attribute__((always_inline)) GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], eps[tid] ); + +} + +template +__attribute__((always_inline)) GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], eps[tid], + vrho[tid], vsigma[tid] ); + +} + +template +__attribute__((always_inline)) GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], eps[tid], vrho_i[0], vrho_i[1], + vsigma_i[0], vsigma_i[1], vsigma_i[2] ); } template -inline LDA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) GGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_unpolar_kernel ) { using traits = kernel_traits; + traits::eval_fxc_unpolar( rho[tid], sigma[tid], v2rho2[tid], v2rhosigma[tid], v2sigma2[tid] ); - auto rho_i = rho + 2*idx; - auto vxc_i = vxc + 2*idx; +} - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); +template +__attribute__((always_inline)) GGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_polar_kernel ) { - double v_a, v_b, e; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, e, v_a, v_b); - eps[idx] += scal_fact * e; - vxc_i[0] += scal_fact * v_a; - vxc_i[1] += scal_fact * v_b; + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + + + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5] ); + +} + +template +__attribute__((always_inline)) GGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], vrho[tid], vsigma[tid], + v2rho2[tid], v2rhosigma[tid], v2sigma2[tid] ); } template -inline GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { +__attribute__((always_inline)) GGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + vrho_i[0], vrho_i[1], vsigma_i[0], vsigma_i[1], vsigma_i[2], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5] ); + +} + + +template +__attribute__((always_inline)) GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); - traits::eval_exc_unpolar( rho_use, sigma_use, eps[idx] ); + double e; + traits::eval_exc_unpolar( rho[tid], sigma[tid], e ); + eps[tid] += scal_fact * e; + +} + +template +__attribute__((always_inline)) GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + double e; + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], e ); + eps[tid] += scal_fact * e; } template -inline GGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { +__attribute__((always_inline)) GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; + double e, vr, vs; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], e, vr, vs ); + eps[tid] += scal_fact * e; + vrho[tid] += scal_fact * vr; + vsigma[tid] += scal_fact * vs; - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); +} - traits::eval_exc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, eps[idx] ); +template +__attribute__((always_inline)) GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + + double e, vra, vrb, vsaa,vsab,vsbb; + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], e, vra, vrb, vsaa, vsab, vsbb ); + + eps[tid] += scal_fact * e; + vrho_i[0] += scal_fact * vra; + vrho_i[1] += scal_fact * vrb; + vsigma_i[0] += scal_fact * vsaa; + vsigma_i[1] += scal_fact * vsab; + vsigma_i[2] += scal_fact * vsbb; } + template -inline GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { +__attribute__((always_inline)) GGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + double f2, f3, f4; + traits::eval_fxc_unpolar( rho[tid], sigma[tid], f2, f3, f4 ); + v2rho2[tid] += scal_fact * f2; + v2rhosigma[tid] += scal_fact * f3; + v2sigma2[tid] += scal_fact * f4; +} +template +__attribute__((always_inline)) GGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_polar_kernel ) { using traits = kernel_traits; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); - traits::eval_exc_vxc_unpolar( rho_use, sigma_use, eps[idx], - vrho[idx], vsigma[idx] ); + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + double f2[3], f3[6], f4[6]; + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + f2[0], f2[1], f2[2], + f3[0], f3[1], f3[2], f3[3], f3[4], f3[5], + f4[0], f4[1], f4[2], f4[3], f4[4], f4[5] ); + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f3[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f4[i]; +} +template +__attribute__((always_inline)) GGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + double v, s, f2, f3, f4; + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], v, s, f2, f3, f4 ); + vrho[tid] += scal_fact * v; + vsigma[tid] += scal_fact * s; + v2rho2[tid] += scal_fact * f2; + v2rhosigma[tid] += scal_fact * f3; + v2sigma2[tid] += scal_fact * f4; } template -inline GGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { +__attribute__((always_inline)) GGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_polar_kernel ) { + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* v2rho2_i = v2rho2 + 3*tid; + auto* v2rhosigma_i = v2rhosigma + 6*tid; + auto* v2sigma2_i = v2sigma2 + 6*tid; + double v[2], s[3], f2[3], f3[6], f4[6]; + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + v[0], v[1], s[0], s[1], s[2], + f2[0], f2[1], f2[2], + f3[0], f3[1], f3[2], f3[3], f3[4], f3[5], + f4[0], f4[1], f4[2], f4[3], f4[4], f4[5] ); + for(int i=0;i<2;++i) vrho_i[i] += scal_fact * v[i]; + for(int i=0;i<3;++i) vsigma_i[i] += scal_fact * s[i]; + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f3[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f4[i]; +} + + + + + + + + + + + + + + +template +__attribute__((always_inline)) MGGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_unpolar_kernel ) { using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + traits::eval_exc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], eps[tid] ); + +} - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; - auto* vrho_i = vrho + 2*idx; - auto* vsigma_i = vsigma + 3*idx; - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); +template +__attribute__((always_inline)) MGGA_EXC_GENERATOR_SYCL_KERNEL( device_eval_exc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : nullptr; + auto* tau_i = tau + 2*tid; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, eps[idx], vrho_i[0], vrho_i[1], - vsigma_i[0], vsigma_i[1], vsigma_i[2] ); + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], eps[tid] ); } +template +__attribute__((always_inline)) MGGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + + double dummy; + auto& vlapl_return = traits::needs_laplacian ? vlapl[tid] : dummy; + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + eps[tid], vrho[tid], vsigma[tid], vlapl_return, vtau[tid] ); + +} template -inline GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) MGGA_EXC_VXC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_helper_polar_kernel ) { using traits = kernel_traits; - double e; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); + double dummy_vlapl[2]; + + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* vlapl_i = traits::needs_laplacian ? vlapl + 2*tid : dummy_vlapl; + auto* vtau_i = vtau + 2*tid; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], eps[tid], vrho_i[0], vrho_i[1], vsigma_i[0], vsigma_i[1], + vsigma_i[2], vlapl_i[0], vlapl_i[1], vtau_i[0], vtau_i[1] ); + +} + +template +__attribute__((always_inline)) MGGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double local_v2rholapl, local_v2sigmalapl, local_v2lapl2, local_v2lapltau; + + auto& v2rholapl_return = traits::needs_laplacian ? v2rholapl[tid] : local_v2rholapl; + auto& v2sigmalapl_return = traits::needs_laplacian ? v2sigmalapl[tid] : local_v2sigmalapl; + auto& v2lapl2_return = traits::needs_laplacian ? v2lapl2[tid] : local_v2lapl2; + auto& v2lapltau_return = traits::needs_laplacian ? v2lapltau[tid] : local_v2lapltau; + + traits::eval_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + v2rho2[tid], v2rhosigma[tid], v2rholapl_return, v2rhotau[tid], + v2sigma2[tid], v2sigmalapl_return, v2sigmatau[tid], + v2lapl2_return, v2lapltau_return, v2tau2[tid] ); - traits::eval_exc_unpolar( rho_use, sigma_use, e ); - eps[idx] += scal_fact * e; +} + +template +__attribute__((always_inline)) MGGA_FXC_GENERATOR_SYCL_KERNEL( device_eval_fxc_helper_polar_kernel ) { + using traits = kernel_traits; + double dummy_v2rholapl[4]; + double dummy_v2sigmalapl[6]; + double dummy_v2lapl2[3]; + double dummy_v2lapltau[4]; + + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + auto* v2rholapl_i = traits::needs_laplacian ? (v2rholapl + 4 * tid) : dummy_v2rholapl; + auto* v2sigmalapl_i = traits::needs_laplacian ? (v2sigmalapl + 6 * tid) : dummy_v2sigmalapl; + auto* v2lapl2_i = traits::needs_laplacian ? (v2lapl2 + 3 * tid) : dummy_v2lapl2; + auto* v2lapltau_i = traits::needs_laplacian ? (v2lapltau + 4 * tid) : dummy_v2lapltau; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2rholapl_i[0], v2rholapl_i[1], v2rholapl_i[2], v2rholapl_i[3], + v2rhotau_i[0], v2rhotau_i[1], v2rhotau_i[2], v2rhotau_i[3], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5], + v2sigmalapl_i[0], v2sigmalapl_i[1], v2sigmalapl_i[2], + v2sigmalapl_i[3], v2sigmalapl_i[4], v2sigmalapl_i[5], + v2sigmatau_i[0], v2sigmatau_i[1], v2sigmatau_i[2], + v2sigmatau_i[3], v2sigmatau_i[4], v2sigmatau_i[5], + v2lapl2_i[0], v2lapl2_i[1], v2lapl2_i[2], + v2lapltau_i[0], v2lapltau_i[1], v2lapltau_i[2], v2lapltau_i[3], + v2tau2_i[0], v2tau2_i[1], v2tau2_i[2] ); } template -inline GGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) MGGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_unpolar_kernel ) { + + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double dummy_v2rholapl, dummy_v2sigmalapl, dummy_v2lapl2, dummy_v2lapltau, dummy_vlapl; + auto& vlapl_return = traits::needs_laplacian ? vlapl[tid] : dummy_vlapl; + auto& v2rholapl_return = traits::needs_laplacian ? v2rholapl[tid] : dummy_v2rholapl; + auto& v2sigmalapl_return = traits::needs_laplacian ? v2sigmalapl[tid] : dummy_v2sigmalapl; + auto& v2lapl2_return = traits::needs_laplacian ? v2lapl2[tid] : dummy_v2lapl2; + auto& v2lapltau_return = traits::needs_laplacian ? v2lapltau[tid] : dummy_v2lapltau; + + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + vrho[tid], vsigma[tid], vlapl_return, vtau[tid], + v2rho2[tid], v2rhosigma[tid], v2rholapl_return, + v2rhotau[tid], v2sigma2[tid], v2sigmalapl_return, + v2sigmatau[tid], v2lapl2_return, v2lapltau_return, + v2tau2[tid] ); + +} + +template +__attribute__((always_inline)) MGGA_VXC_FXC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_helper_polar_kernel ) { using traits = kernel_traits; + double dummy_vlapl[2]; + double dummy_v2rholapl[4]; + double dummy_v2sigmalapl[6]; + double dummy_v2lapl2[3]; + double dummy_v2lapltau[4]; + + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* vrho_i = vrho + 2 * tid; + auto* vsigma_i = vsigma + 3 * tid; + auto* vtau_i = vtau + 2 * tid; + + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + auto* vlapl_i = traits::needs_laplacian ? (vlapl + 2 * tid) : dummy_vlapl; + auto* v2rholapl_i = traits::needs_laplacian ? (v2rholapl + 4 * tid) : dummy_v2rholapl; + auto* v2sigmalapl_i = traits::needs_laplacian ? (v2sigmalapl + 6 * tid) : dummy_v2sigmalapl; + auto* v2lapl2_i = traits::needs_laplacian ? (v2lapl2 + 3 * tid) : dummy_v2lapl2; + auto* v2lapltau_i = traits::needs_laplacian ? (v2lapltau + 4 * tid) : dummy_v2lapltau; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + vrho_i[0], vrho_i[1], vsigma_i[0], vsigma_i[1], vsigma_i[2], + vlapl_i[0], vlapl_i[1], vtau_i[0], vtau_i[1], + v2rho2_i[0], v2rho2_i[1], v2rho2_i[2], + v2rhosigma_i[0], v2rhosigma_i[1], v2rhosigma_i[2], + v2rhosigma_i[3], v2rhosigma_i[4], v2rhosigma_i[5], + v2rholapl_i[0], v2rholapl_i[1], v2rholapl_i[2], v2rholapl_i[3], + v2rhotau_i[0], v2rhotau_i[1], v2rhotau_i[2], v2rhotau_i[3], + v2sigma2_i[0], v2sigma2_i[1], v2sigma2_i[2], + v2sigma2_i[3], v2sigma2_i[4], v2sigma2_i[5], + v2sigmalapl_i[0], v2sigmalapl_i[1], v2sigmalapl_i[2], + v2sigmalapl_i[3], v2sigmalapl_i[4], v2sigmalapl_i[5], + v2sigmatau_i[0], v2sigmatau_i[1], v2sigmatau_i[2], + v2sigmatau_i[3], v2sigmatau_i[4], v2sigmatau_i[5], + v2lapl2_i[0], v2lapl2_i[1], v2lapl2_i[2], + v2lapltau_i[0], v2lapltau_i[1], v2lapltau_i[2], v2lapltau_i[3], + v2tau2_i[0], v2tau2_i[1], v2tau2_i[2] ); +} - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; +template +__attribute__((always_inline)) MGGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_unpolar_kernel ) { - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); + using traits = kernel_traits; double e; - traits::eval_exc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, e ); - eps[idx] += scal_fact * e; + + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + traits::eval_exc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], e ); + eps[tid] += scal_fact * e; } template -inline GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { +__attribute__((always_inline)) MGGA_EXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_inc_helper_polar_kernel ) { + + using traits = kernel_traits; + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; + + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + double e; + traits::eval_exc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], e ); + eps[tid] += scal_fact * e; + +} + +template +__attribute__((always_inline)) MGGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_unpolar_kernel ) { using traits = kernel_traits; - double e, vr, vs; - const double rho_use = sycl::max( rho[idx], 0. ); - const double sigma_use = sycl::max( sigma[idx], 1e-40 ); + double e, vr, vs, vl, vt; - traits::eval_exc_vxc_unpolar( rho_use, sigma_use, e, vr, vs ); - eps[idx] += scal_fact * e; - vrho[idx] += scal_fact * vr; - vsigma[idx] += scal_fact * vs; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + + traits::eval_exc_vxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + e, vr, vs, vl, vt ); + eps[tid] += scal_fact * e; + vrho[tid] += scal_fact * vr; + vsigma[tid] += scal_fact * vs; + vtau[tid] += scal_fact * vt; + if(traits::needs_laplacian) vlapl[tid] += scal_fact * vl; } template -inline GGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { +__attribute__((always_inline)) MGGA_EXC_VXC_INC_GENERATOR_SYCL_KERNEL( device_eval_exc_vxc_inc_helper_polar_kernel ) { using traits = kernel_traits; - auto* rho_i = rho + 2*idx; - auto* sigma_i = sigma + 3*idx; - auto* vrho_i = vrho + 2*idx; - auto* vsigma_i = vsigma + 3*idx; + double dummy_vlapl[2]; + + auto* rho_i = rho + 2*tid; + auto* sigma_i = sigma + 3*tid; + auto* lapl_i = traits::needs_laplacian ? (lapl + 2*tid) : lapl; + auto* tau_i = tau + 2*tid; - const double rho_a_use = sycl::max( rho_i[0], 0. ); - const double rho_b_use = sycl::max( rho_i[1], 0. ); - const double sigma_aa_use = sycl::max( sigma_i[0], 1e-40 ); - const double sigma_bb_use = sycl::max( sigma_i[2], 1e-40 ); - const double sigma_ab_use = sycl::max( - sigma_i[1], -(sigma_i[0] + sigma_i[1]) / 2. - ); + auto* vrho_i = vrho + 2*tid; + auto* vsigma_i = vsigma + 3*tid; + auto* vlapl_i = traits::needs_laplacian ? vlapl + 2*tid : dummy_vlapl; + auto* vtau_i = vtau + 2*tid; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; - double e, vra, vrb, vsaa,vsab,vsbb; - traits::eval_exc_vxc_polar( rho_a_use, rho_b_use, sigma_aa_use, - sigma_ab_use, sigma_bb_use, e, vra, vrb, vsaa, vsab, vsbb ); - eps[idx] += scal_fact * e; + double e, vra, vrb, vsaa,vsab,vsbb, vla, vlb, vta, vtb; + traits::eval_exc_vxc_polar( rho_i[0], rho_i[1], sigma_i[0], + sigma_i[1], sigma_i[2], lapl_a_use, lapl_b_use, tau_i[0], + tau_i[1], e, vra, vrb, vsaa, vsab, vsbb, vla, vlb, vta, vtb ); + + eps[tid] += scal_fact * e; vrho_i[0] += scal_fact * vra; vrho_i[1] += scal_fact * vrb; vsigma_i[0] += scal_fact * vsaa; vsigma_i[1] += scal_fact * vsab; vsigma_i[2] += scal_fact * vsbb; + vtau_i[0] += scal_fact * vta; + vtau_i[1] += scal_fact * vtb; + if(traits::needs_laplacian) { + vlapl_i[0] += scal_fact * vla; + vlapl_i[1] += scal_fact * vlb; + } } +template +__attribute__((always_inline)) MGGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double f_rho2, f_rhosigma, f_rholapl, f_rhotau, f_sigma2, f_sigmalapl, f_sigmatau, f_lapl2, f_lapltau, f_tau2; + traits::eval_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + f_rho2, f_rhosigma, f_rholapl, f_rhotau, + f_sigma2, f_sigmalapl, f_sigmatau, + f_lapl2, f_lapltau, f_tau2 ); + v2rho2[tid] += scal_fact * f_rho2; + v2rhosigma[tid] += scal_fact * f_rhosigma; + v2rhotau[tid] += scal_fact * f_rhotau; + v2sigma2[tid] += scal_fact * f_sigma2; + v2sigmatau[tid] += scal_fact * f_sigmatau; + v2tau2[tid] += scal_fact * f_tau2; + if(traits::needs_laplacian) { + v2rholapl[tid] += scal_fact * f_rholapl; + v2sigmalapl[tid] += scal_fact * f_sigmalapl; + v2lapl2[tid] += scal_fact * f_lapl2; + v2lapltau[tid] += scal_fact * f_lapltau; + } +} +template +__attribute__((always_inline)) MGGA_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_fxc_inc_helper_polar_kernel ) { + using traits = kernel_traits; + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + double f_rho2[3], f_rhosigma[6], f_rholapl[4], f_rhotau[4], f_sigma2[6], f_sigmalapl[6], f_sigmatau[6], f_lapl2[3], f_lapltau[4], f_tau2[3]; + + traits::eval_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + f_rho2[0], f_rho2[1], f_rho2[2], + f_rhosigma[0], f_rhosigma[1], f_rhosigma[2], f_rhosigma[3], f_rhosigma[4], f_rhosigma[5], + f_rholapl[0], f_rholapl[1], f_rholapl[2], f_rholapl[3], + f_rhotau[0], f_rhotau[1], f_rhotau[2], f_rhotau[3], + f_sigma2[0], f_sigma2[1], f_sigma2[2], f_sigma2[3], f_sigma2[4], f_sigma2[5], + f_sigmalapl[0], f_sigmalapl[1], f_sigmalapl[2], f_sigmalapl[3], f_sigmalapl[4], f_sigmalapl[5], + f_sigmatau[0], f_sigmatau[1], f_sigmatau[2], f_sigmatau[3], f_sigmatau[4], f_sigmatau[5], + f_lapl2[0], f_lapl2[1], f_lapl2[2], + f_lapltau[0], f_lapltau[1], f_lapltau[2], f_lapltau[3], + f_tau2[0], f_tau2[1], f_tau2[2] ); + + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f_rho2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f_rhosigma[i]; + for(int i=0;i<4;++i) v2rhotau_i[i] += scal_fact * f_rhotau[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f_sigma2[i]; + for(int i=0;i<6;++i) v2sigmatau_i[i] += scal_fact * f_sigmatau[i]; + for(int i=0;i<3;++i) v2tau2_i[i] += scal_fact * f_tau2[i]; + + if(traits::needs_laplacian) { + auto* v2rholapl_i = v2rholapl + 4 * tid; + auto* v2sigmalapl_i = v2sigmalapl + 6 * tid; + auto* v2lapl2_i = v2lapl2 + 3 * tid; + auto* v2lapltau_i = v2lapltau + 4 * tid; + for(int i=0;i<4;++i) v2rholapl_i[i] += scal_fact * f_rholapl[i]; + for(int i=0;i<6;++i) v2sigmalapl_i[i] += scal_fact * f_sigmalapl[i]; + for(int i=0;i<3;++i) v2lapl2_i[i] += scal_fact * f_lapl2[i]; + for(int i=0;i<4;++i) v2lapltau_i[i] += scal_fact * f_lapltau[i]; + } + +} +template +__attribute__((always_inline)) MGGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL( device_eval_vxc_fxc_inc_helper_unpolar_kernel ) { + using traits = kernel_traits; + const double lapl_use = traits::needs_laplacian ? lapl[tid] : 0.0; + double f_rho2, f_rhosigma, f_rholapl, f_rhotau, f_sigma2, f_sigmalapl, f_sigmatau, f_lapl2, f_lapltau, f_tau2; + double vr, vs, vl, vt; + traits::eval_vxc_fxc_unpolar( rho[tid], sigma[tid], lapl_use, tau[tid], + vr, vs, vl, vt, + f_rho2, f_rhosigma, f_rholapl, f_rhotau, + f_sigma2, f_sigmalapl, f_sigmatau, + f_lapl2, f_lapltau, f_tau2); + + vrho[tid] += scal_fact * vr; + vsigma[tid] += scal_fact * vs; + vtau[tid] += scal_fact * vt; + v2rho2[tid] += scal_fact * f_rho2; + v2rhosigma[tid] += scal_fact * f_rhosigma; + v2rhotau[tid] += scal_fact * f_rhotau; + v2sigma2[tid] += scal_fact * f_sigma2; + v2sigmatau[tid] += scal_fact * f_sigmatau; + v2tau2[tid] += scal_fact * f_tau2; + + if(traits::needs_laplacian) { + vlapl[tid] += scal_fact * vl; + v2rholapl[tid] += scal_fact * f_rholapl; + v2sigmalapl[tid] += scal_fact * f_sigmalapl; + v2lapl2[tid] += scal_fact * f_lapl2; + v2lapltau[tid] += scal_fact * f_lapltau; + } +} +template +__attribute__((always_inline)) MGGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL(device_eval_vxc_fxc_inc_helper_polar_kernel) { + using traits = kernel_traits; + auto* rho_i = rho + 2 * tid; + auto* sigma_i = sigma + 3 * tid; + auto* tau_i = tau + 2 * tid; + auto* vrho_i = vrho + 2 * tid; + auto* vsigma_i = vsigma + 3 * tid; + auto* vtau_i = vtau + 2 * tid; + + auto* v2rho2_i = v2rho2 + 3 * tid; + auto* v2rhosigma_i = v2rhosigma + 6 * tid; + auto* v2rhotau_i = v2rhotau + 4 * tid; + auto* v2sigma2_i = v2sigma2 + 6 * tid; + auto* v2sigmatau_i = v2sigmatau + 6 * tid; + auto* v2tau2_i = v2tau2 + 3 * tid; + + auto* lapl_i = traits::needs_laplacian ? (lapl + 2 * tid) : lapl; + const double lapl_a_use = traits::needs_laplacian ? lapl_i[0] : 0.0; + const double lapl_b_use = traits::needs_laplacian ? lapl_i[1] : 0.0; + + double frho[2], fsigma[3], flapl[2], ftau[2]; + double f_rho2[3], f_rhosigma[6], f_rholapl[4], f_rhotau[4], f_sigma2[6], f_sigmalapl[6], f_sigmatau[6], f_lapl2[3], f_lapltau[4], f_tau2[3]; + + traits::eval_vxc_fxc_polar( rho_i[0], rho_i[1], sigma_i[0], sigma_i[1], sigma_i[2], + lapl_a_use, lapl_b_use, tau_i[0], tau_i[1], + frho[0], frho[1], fsigma[0], fsigma[1], fsigma[2], + flapl[0], flapl[1], ftau[0], ftau[1], + f_rho2[0], f_rho2[1], f_rho2[2], + f_rhosigma[0], f_rhosigma[1], f_rhosigma[2], + f_rhosigma[3], f_rhosigma[4], f_rhosigma[5], + f_rholapl[0], f_rholapl[1], f_rholapl[2], f_rholapl[3], + f_rhotau[0], f_rhotau[1], f_rhotau[2], f_rhotau[3], + f_sigma2[0], f_sigma2[1], f_sigma2[2], + f_sigma2[3], f_sigma2[4], f_sigma2[5], + f_sigmalapl[0], f_sigmalapl[1], f_sigmalapl[2], + f_sigmalapl[3], f_sigmalapl[4], f_sigmalapl[5], + f_sigmatau[0], f_sigmatau[1], f_sigmatau[2], + f_sigmatau[3], f_sigmatau[4], f_sigmatau[5], + f_lapl2[0], f_lapl2[1], f_lapl2[2], + f_lapltau[0], f_lapltau[1], f_lapltau[2], f_lapltau[3], + f_tau2[0], f_tau2[1], f_tau2[2] ); + + for(int i=0;i<2;++i) vrho_i[i] += scal_fact * frho[i]; + for(int i=0;i<3;++i) vsigma_i[i] += scal_fact * fsigma[i]; + for(int i=0;i<2;++i) vtau_i[i] += scal_fact * ftau[i]; + + for(int i=0;i<3;++i) v2rho2_i[i] += scal_fact * f_rho2[i]; + for(int i=0;i<6;++i) v2rhosigma_i[i] += scal_fact * f_rhosigma[i]; + for(int i=0;i<4;++i) v2rhotau_i[i] += scal_fact * f_rhotau[i]; + for(int i=0;i<6;++i) v2sigma2_i[i] += scal_fact * f_sigma2[i]; + for(int i=0;i<6;++i) v2sigmatau_i[i] += scal_fact * f_sigmatau[i]; + for(int i=0;i<3;++i) v2tau2_i[i] += scal_fact * f_tau2[i]; + + if(traits::needs_laplacian) { + auto* vlapl_i = vlapl + 2 * tid; + auto* v2rholapl_i = v2rholapl + 4 * tid; + auto* v2sigmalapl_i = v2sigmalapl + 6 * tid; + auto* v2lapl2_i = v2lapl2 + 3 * tid; + auto* v2lapltau_i = v2lapltau + 4 * tid; + for(int i=0;i<2;++i) vlapl_i[i] += scal_fact * flapl[i]; + for(int i=0;i<4;++i) v2rholapl_i[i] += scal_fact * f_rholapl[i]; + for(int i=0;i<6;++i) v2sigmalapl_i[i] += scal_fact * f_sigmalapl[i]; + for(int i=0;i<3;++i) v2lapl2_i[i] += scal_fact * f_lapl2[i]; + for(int i=0;i<4;++i) v2lapltau_i[i] += scal_fact * f_lapltau[i]; + } -template class lda_eval_exc_unpolar; -template class lda_eval_exc_polar; -template class lda_eval_exc_vxc_unpolar; -template class lda_eval_exc_vxc_polar; -template class lda_eval_exc_inc_unpolar; -template class lda_eval_exc_inc_polar; -template class lda_eval_exc_vxc_inc_unpolar; -template class lda_eval_exc_vxc_inc_polar; +} -template class gga_eval_exc_unpolar; -template class gga_eval_exc_polar; -template class gga_eval_exc_vxc_unpolar; -template class gga_eval_exc_vxc_polar; -template class gga_eval_exc_inc_unpolar; -template class gga_eval_exc_inc_polar; -template class gga_eval_exc_vxc_inc_unpolar; -template class gga_eval_exc_vxc_inc_polar; @@ -356,11 +979,9 @@ template class gga_eval_exc_vxc_inc_polar; template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), - [=](sycl::id<1> idx) { - device_eval_exc_helper_unpolar_kernel( - N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_unpolar_kernel( + N, rho, eps, tid); }); } @@ -368,11 +989,9 @@ LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_helper_polar_kernel( - N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_polar_kernel( + N, rho, eps, tid); }); } @@ -380,11 +999,9 @@ LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_unpolar_kernel( - N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_unpolar_kernel( + N, rho, eps, vxc, tid); }); } @@ -392,31 +1009,58 @@ LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_polar_kernel( - N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_polar_kernel( + N, rho, eps, vxc, tid); }); } +template +LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_unpolar_kernel( + N, rho, fxc, tid); + }); +} + +template +LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_polar_kernel( + N, rho, fxc, tid); + }); +} +template +LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_unpolar_kernel( + N, rho, vxc, fxc, tid); + }); +} +template +LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_polar_kernel( + N, rho, vxc, fxc, tid); + }); +} template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_unpolar_kernel( - scal_fact, N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_unpolar_kernel( + scal_fact, N, rho, eps, tid); }); } @@ -424,11 +1068,9 @@ LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_polar_kernel( - scal_fact, N, rho, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_polar_kernel( + scal_fact, N, rho, eps, tid); }); } @@ -436,11 +1078,9 @@ LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_unpolar_kernel( - scal_fact, N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, eps, vxc, tid); }); } @@ -448,20 +1088,52 @@ LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_polar_kernel( - scal_fact, N, rho, eps, vxc, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_polar_kernel( + scal_fact, N, rho, eps, vxc, tid); + }); + +} + +template +LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, fxc, tid); }); } +template +LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, fxc, tid); + }); +} +template +LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, vxc, fxc, tid); + }); + +} +template +LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, vxc, fxc, tid); + }); +} @@ -469,11 +1141,9 @@ LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_helper_unpolar_kernel( - N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_unpolar_kernel( + N, rho, sigma, eps, tid); }); } @@ -481,11 +1151,9 @@ GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_helper_polar_kernel( - N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_polar_kernel( + N, rho, sigma, eps, tid); }); } @@ -493,11 +1161,9 @@ GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_unpolar_kernel( - N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_unpolar_kernel( + N, rho, sigma, eps, vrho, vsigma, tid); }); } @@ -505,31 +1171,59 @@ GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_helper_polar_kernel( - N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_polar_kernel( + N, rho, sigma, eps, vrho, vsigma, tid); }); } +template +GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_unpolar_kernel( + N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); + +} + +template +GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_polar_kernel( + N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} +template +GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_unpolar_kernel( + N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} +template +GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_polar_kernel( + N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_unpolar_kernel( - scal_fact, N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, eps, tid); }); } @@ -537,11 +1231,9 @@ GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_inc_helper_polar_kernel( - scal_fact, N, rho, sigma, eps, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, eps, tid); }); } @@ -549,11 +1241,9 @@ GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_unpolar_kernel( - scal_fact, N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, eps, vrho, vsigma, tid); }); } @@ -561,15 +1251,234 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>( N ), - [=](sycl::id<1> idx) { - device_eval_exc_vxc_inc_helper_polar_kernel( - scal_fact, N, rho, sigma, eps, vrho, vsigma, idx - ); + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, eps, vrho, vsigma, tid); + }); + +} + + +template +GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} + +template +GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} + +template +GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); +} + +template +GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); + }); + +} + +template +MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_helper_polar_kernel( + N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); + }); + +} + +template +MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_helper_polar_kernel( + N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); + }); + +} + +template +MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_helper_polar_kernel( + N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_unpolar_kernel( + N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_helper_polar_kernel( + N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, tid); + }); + +} + +template +MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); + }); + +} + +template +MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_exc_vxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); + }); + +} + +template +MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + +template +MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_unpolar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); }); } +template +MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { + + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + device_eval_vxc_fxc_inc_helper_polar_kernel( + scal_fact, N, rho, sigma, lapl, tau, + vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, + v2lapl2, v2lapltau, v2tau2, tid); + }); + +} + + #define LDA_GENERATE_DEVICE_HELPERS(KERN) \ template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ @@ -579,7 +1488,15 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ); \ template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ); \ template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ - template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); + template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); \ + template LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ); \ + template LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ); \ + template LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ); \ + template LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ); \ + template LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ); \ + template LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ); \ + template LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ); \ + template LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ); #define GGA_GENERATE_DEVICE_HELPERS(KERN) \ template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ @@ -589,11 +1506,38 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ); \ template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ); \ template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ - template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); + template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); \ + template GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ); \ + template GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ); \ + template GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ); \ + template GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ); \ + template GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ); \ + template GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ); \ + template GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ); \ + template GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ); + +#define MGGA_GENERATE_DEVICE_HELPERS(KERN) \ + template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ); \ + template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ); \ + template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ); \ + template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar );\ + template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ); \ + template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ); \ + template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ); \ + template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ); \ + template MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ); \ + template MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ); \ + template MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ); \ + template MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ); \ + template MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ); \ + template MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ); \ + template MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ); \ + template MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ); LDA_GENERATE_DEVICE_HELPERS( BuiltinSlaterExchange ); LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN3 ); LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN_RPA ); +LDA_GENERATE_DEVICE_HELPERS( BuiltinVWN ); LDA_GENERATE_DEVICE_HELPERS( BuiltinPW91_LDA ); LDA_GENERATE_DEVICE_HELPERS( BuiltinPW91_LDA_MOD ); LDA_GENERATE_DEVICE_HELPERS( BuiltinPW91_LDA_RPA ); @@ -605,12 +1549,93 @@ GGA_GENERATE_DEVICE_HELPERS( BuiltinLYP ); GGA_GENERATE_DEVICE_HELPERS( BuiltinPBE_X ); GGA_GENERATE_DEVICE_HELPERS( BuiltinRevPBE_X ); GGA_GENERATE_DEVICE_HELPERS( BuiltinPBE_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinB97_D ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinITYH_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinITYH_X_033 ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinITYH_X_015 ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinP86_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinP86VWN_FT_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPW91_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPBE_SOL_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinBMK_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinN12_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinN12_SX_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinSOGGA11_X_C ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPW91_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinMPW91_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinOPTX_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinRPBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinSOGGA11_X_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinPW86_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_V_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_D_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWB97X_D3_XC ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinHJS_PBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinLCwPBE_wPBEh_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinLRCwPBE_HJS_PBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinLRCwPBEh_HJS_PBE_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinWPBEh_X_default0 ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinHSE03_wPBEh_X ); +GGA_GENERATE_DEVICE_HELPERS( BuiltinHSE06_wPBEh_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCAN_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCAN_C ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCAN_X ); MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCAN_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinFT98_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM062X_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM062X_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPKZB_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPKZB_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinTPSS_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevTPSS_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_HF_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevM06_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_SX_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_HF_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevM06_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM06_SX_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_2X_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_HX_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_SO_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinCF22D_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM11_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_SX_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_L_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinTPSS_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRevTPSS_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRSCAN_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinBC95_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMBEEF_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinRSCAN_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinBMK_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_HX_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM08_SO_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_L_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN15_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinCF22D_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinMN12_SX_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM11_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinM05_2X_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPC07_K ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinPC07OPT_K ); + +MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCANL_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinSCANL_X ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCANL_C ); +MGGA_GENERATE_DEVICE_HELPERS( BuiltinR2SCANL_X ); LDA_GENERATE_DEVICE_HELPERS( BuiltinEPC17_1 ) LDA_GENERATE_DEVICE_HELPERS( BuiltinEPC17_2 ) diff --git a/src/sycl/exchcxx_sycl.cmake b/src/sycl/exchcxx_sycl.cmake index c1f80f5..9a1bd5f 100644 --- a/src/sycl/exchcxx_sycl.cmake +++ b/src/sycl/exchcxx_sycl.cmake @@ -13,9 +13,12 @@ list( APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ) find_package( SYCL REQUIRED ) target_link_libraries( exchcxx PUBLIC SYCL::SYCL ) +target_compile_options(exchcxx PRIVATE $<$:-ffp-model=precise>) +target_link_options(exchcxx PRIVATE -fsycl-max-parallel-link-jobs=20) + include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-fno-sycl-id-queries-fit-in-int" EXCHCXX_SYCL_ID_QUERIES_FIT_IN_INT ) -check_cxx_compiler_flag("-fsycl-device-code-split=per_kernel" EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_KERNEL ) +check_cxx_compiler_flag("-fsycl-device-code-split=per_source" EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_SOURCE ) check_cxx_compiler_flag("-fno-sycl-early-optimizations" EXCHCXX_SYCL_HAS_NO_EARLY_OPTIMIZATIONS ) @@ -25,9 +28,9 @@ if( EXCHCXX_SYCL_ID_QUERIES_FIT_IN_INT ) ) endif() -if( EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_KERNEL ) +if( EXCHCXX_SYCL_DEVICE_CODE_SPLIT_PER_SOURCE ) target_compile_options( exchcxx PRIVATE - $<$: -fsycl-device-code-split=per_kernel> + $<$: -fsycl-device-code-split=per_source> ) endif() diff --git a/src/sycl/libxc_device.cxx b/src/sycl/libxc_device.cxx index 89102ba..a19f6b8 100644 --- a/src/sycl/libxc_device.cxx +++ b/src/sycl/libxc_device.cxx @@ -2,23 +2,23 @@ * ExchCXX Copyright (c) 2020-2022, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of * any required approvals from the U.S. Dept. of Energy). All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -30,7 +30,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -51,33 +51,31 @@ -void recv_from_device( void* dest, const void* src, const size_t len, +void recv_from_device( void* dest, const void* src, const size_t len, sycl::queue* queue ) { try { queue->memcpy( dest, src, len ); - } catch( sycl::exception const &ex ) { - throw( std::runtime_error( "recv failed + " + std::string(ex.what()) ) ); + } catch( sycl::exception const &ex ) { + throw( std::runtime_error( "recv failed + " + std::string(ex.what()) ) ); } } -void send_to_device( void* dest, const void* src, const size_t len, +void send_to_device( void* dest, const void* src, const size_t len, sycl::queue* queue ) { try { queue->memcpy( dest, src, len ); - } catch( sycl::exception const &ex ) { - throw( std::runtime_error( "send failed + " + std::string(ex.what()) ) ); + } catch( sycl::exception const &ex ) { + throw( std::runtime_error( "send failed + " + std::string(ex.what()) ) ); } } void queue_sync( sycl::queue* queue ) { - queue->wait_and_throw(); - } @@ -142,6 +140,56 @@ LDA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { } +LDA_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_fxc = this->v2rho2_buffer_len(N); + + size_t len_rho = sz_rho*sizeof(double); + size_t len_fxc = sz_fxc*sizeof(double); + + std::vector rho_host( sz_rho ), fxc_host( sz_fxc ); + + recv_from_device( rho_host.data(), rho, len_rho, queue ); + + queue_sync( queue ); + xc_lda_fxc( &kernel_, N, rho_host.data(), fxc_host.data() ); + + send_to_device( fxc, fxc_host.data(), len_fxc, queue ); + queue_sync( queue ); // Lifetime of host vectors + +} + + +LDA_VXC_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_vxc_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_vxc = this->vrho_buffer_len(N); + size_t sz_fxc = this->v2rho2_buffer_len(N); + + size_t len_rho = sz_rho*sizeof(double); + size_t len_vxc = sz_vxc*sizeof(double); + size_t len_fxc = sz_fxc*sizeof(double); + + std::vector rho_host( sz_rho ), vxc_host( sz_vxc ), fxc_host( sz_fxc ); + + recv_from_device( rho_host.data(), rho, len_rho, queue ); + + queue_sync( queue ); + xc_lda_vxc_fxc( &kernel_, N, rho_host.data(), vxc_host.data(), fxc_host.data() ); + + send_to_device( vxc, vxc_host.data(), len_vxc, queue ); + send_to_device( fxc, fxc_host.data(), len_fxc, queue ); + queue_sync( queue ); // Lifetime of host vectors + +} + // TODO: LDA kxc interfaces // GGA interface @@ -189,14 +237,14 @@ GGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { size_t len_vsigma = sz_vsigma*sizeof(double); size_t len_eps = sz_eps *sizeof(double); - std::vector rho_host( sz_rho ), eps_host( sz_eps ), + std::vector rho_host( sz_rho ), eps_host( sz_eps ), sigma_host( sz_sigma ), vrho_host( sz_vrho ), vsigma_host( sz_vsigma ); recv_from_device( rho_host.data(), rho, len_rho , queue ); recv_from_device( sigma_host.data(), sigma, len_sigma, queue ); - + queue_sync( queue ); - xc_gga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), eps_host.data(), + xc_gga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), eps_host.data(), vrho_host.data(), vsigma_host.data() ); send_to_device( eps, eps_host.data(), len_eps , queue); @@ -206,9 +254,89 @@ GGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { } -// TODO: GGA kxc interfaces - - + GGA_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2sigma2_host(sz_v2sigma2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + + queue_sync(queue); + xc_gga_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), v2sigma2_host.data()); + + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + + queue_sync(queue); // Lifetime of host vectors +} + +GGA_VXC_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_vxc_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_vrho = this->vrho_buffer_len(N); + size_t sz_vsigma = this->vsigma_buffer_len(N); + + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_vrho = sz_vrho * sizeof(double); + size_t len_vsigma = sz_vsigma * sizeof(double); + + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + vrho_host(sz_vrho), vsigma_host(sz_vsigma), + v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2sigma2_host(sz_v2sigma2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + + queue_sync(queue); + xc_gga_vxc_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + vrho_host.data(), vsigma_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), + v2sigma2_host.data()); + + send_to_device(vrho, vrho_host.data(), len_vrho, queue); + send_to_device(vsigma, vsigma_host.data(), len_vsigma, queue); + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + queue_sync(queue); // Lifetime of host vectors +} + +// TODO: GGA kxc interfaces + + // mGGA interface MGGA_EXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_device_ ) const { @@ -227,8 +355,8 @@ MGGA_EXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_device_ ) const { size_t len_tau = sz_tau *sizeof(double); size_t len_eps = sz_eps *sizeof(double); - std::vector rho_host( sz_rho ), eps_host( sz_eps ), - sigma_host( sz_sigma ), lapl_host( sz_lapl ), + std::vector rho_host( sz_rho ), eps_host( sz_eps ), + sigma_host( sz_sigma ), lapl_host( sz_lapl ), tau_host( sz_tau ); recv_from_device( rho_host.data(), rho, len_rho , queue ); @@ -237,7 +365,7 @@ MGGA_EXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_device_ ) const { recv_from_device( tau_host.data(), tau, len_tau , queue ); queue_sync( queue ); - xc_mgga_exc( &kernel_, N, rho_host.data(), sigma_host.data(), lapl_host.data(), + xc_mgga_exc( &kernel_, N, rho_host.data(), sigma_host.data(), lapl_host.data(), tau_host.data(), eps_host.data() ); send_to_device( eps, eps_host.data(), len_eps, queue ); @@ -271,9 +399,9 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { size_t len_vlapl = sz_vlapl *sizeof(double); size_t len_vtau = sz_vtau *sizeof(double); - std::vector rho_host( sz_rho ), eps_host( sz_eps ), sigma_host( sz_sigma ), + std::vector rho_host( sz_rho ), eps_host( sz_eps ), sigma_host( sz_sigma ), lapl_host( sz_lapl ), tau_host( sz_tau ); - std::vector vrho_host( sz_vrho ), vsigma_host( sz_vsigma ), + std::vector vrho_host( sz_vrho ), vsigma_host( sz_vsigma ), vlapl_host( sz_vlapl ), vtau_host( sz_vtau ); recv_from_device( rho_host.data(), rho, len_rho , queue ); @@ -282,8 +410,8 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { recv_from_device( tau_host.data(), tau, len_tau , queue ); queue_sync( queue ); - xc_mgga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), - lapl_host.data(), tau_host.data(), eps_host.data(), vrho_host.data(), + xc_mgga_exc_vxc( &kernel_, N, rho_host.data(), sigma_host.data(), + lapl_host.data(), tau_host.data(), eps_host.data(), vrho_host.data(), vsigma_host.data(), vlapl_host.data(), vtau_host.data() ); send_to_device( eps, eps_host.data(), len_eps , queue ); @@ -295,19 +423,199 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_exc_vxc_device_ ) const { } +MGGA_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_lapl = this->lapl_buffer_len(N) ; + size_t sz_tau = this->tau_buffer_len(N) ; + + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2rholapl = this->v2rholapl_buffer_len(N); + size_t sz_v2rhotau = this->v2rhotau_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + size_t sz_v2sigmalapl = this->v2sigmalapl_buffer_len(N); + size_t sz_v2sigmatau = this->v2sigmatau_buffer_len(N); + size_t sz_v2lapl2 = this->v2lapl2_buffer_len(N); + size_t sz_v2lapltau = this->v2lapltau_buffer_len(N); + size_t sz_v2tau2 = this->v2tau2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_lapl = sz_lapl * sizeof(double); + size_t len_tau = sz_tau * sizeof(double); + + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2rholapl = sz_v2rholapl * sizeof(double); + size_t len_v2rhotau = sz_v2rhotau * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + size_t len_v2sigmalapl = sz_v2sigmalapl * sizeof(double); + size_t len_v2sigmatau = sz_v2sigmatau * sizeof(double); + size_t len_v2lapl2 = sz_v2lapl2 * sizeof(double); + size_t len_v2lapltau = sz_v2lapltau * sizeof(double); + size_t len_v2tau2 = sz_v2tau2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + lapl_host(sz_lapl), tau_host(sz_tau); + + std::vector v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2rholapl_host(sz_v2rholapl), v2rhotau_host(sz_v2rhotau), + v2sigma2_host(sz_v2sigma2), v2sigmalapl_host(sz_v2sigmalapl), + v2sigmatau_host(sz_v2sigmatau), v2lapl2_host(sz_v2lapl2), + v2lapltau_host(sz_v2lapltau), v2tau2_host(sz_v2tau2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + recv_from_device(lapl_host.data(), lapl, len_lapl, queue); + recv_from_device(tau_host.data(), tau, len_tau, queue); + + queue_sync(queue); + xc_mgga_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + lapl_host.data(), tau_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), v2rholapl_host.data(), + v2rhotau_host.data(), v2sigma2_host.data(), v2sigmalapl_host.data(), + v2sigmatau_host.data(), v2lapl2_host.data(), v2lapltau_host.data(), + v2tau2_host.data()); + + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2rholapl, v2rholapl_host.data(), len_v2rholapl, queue); + send_to_device(v2rhotau, v2rhotau_host.data(), len_v2rhotau, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + send_to_device(v2sigmalapl, v2sigmalapl_host.data(), len_v2sigmalapl, queue); + send_to_device(v2sigmatau, v2sigmatau_host.data(), len_v2sigmatau, queue); + send_to_device(v2lapl2, v2lapl2_host.data(), len_v2lapl2, queue); + send_to_device(v2lapltau, v2lapltau_host.data(), len_v2lapltau, queue); + send_to_device(v2tau2, v2tau2_host.data(), len_v2tau2, queue); + + queue_sync(queue); // Lifetime of host vectors +} + + +MGGA_VXC_FXC_GENERATOR_DEVICE( LibxcKernelImpl::eval_vxc_fxc_device_ ) const { + + throw_if_uninitialized(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); + + size_t sz_rho = this->rho_buffer_len(N); + size_t sz_sigma = this->sigma_buffer_len(N); + size_t sz_lapl = this->lapl_buffer_len(N); + size_t sz_tau = this->tau_buffer_len(N) ; + + size_t sz_vrho = this->vrho_buffer_len(N); + size_t sz_vsigma = this->vsigma_buffer_len(N); + size_t sz_vlapl = this->vlapl_buffer_len(N); + size_t sz_vtau = this->vtau_buffer_len(N) ; + + size_t sz_v2rho2 = this->v2rho2_buffer_len(N); + size_t sz_v2rhosigma = this->v2rhosigma_buffer_len(N); + size_t sz_v2rholapl = this->v2rholapl_buffer_len(N); + size_t sz_v2rhotau = this->v2rhotau_buffer_len(N); + size_t sz_v2sigma2 = this->v2sigma2_buffer_len(N); + size_t sz_v2sigmalapl = this->v2sigmalapl_buffer_len(N); + size_t sz_v2sigmatau = this->v2sigmatau_buffer_len(N); + size_t sz_v2lapl2 = this->v2lapl2_buffer_len(N); + size_t sz_v2lapltau = this->v2lapltau_buffer_len(N); + size_t sz_v2tau2 = this->v2tau2_buffer_len(N); + + size_t len_rho = sz_rho * sizeof(double); + size_t len_sigma = sz_sigma * sizeof(double); + size_t len_lapl = sz_lapl * sizeof(double); + size_t len_tau = sz_tau * sizeof(double); + + size_t len_vrho = sz_vrho * sizeof(double); + size_t len_vsigma = sz_vsigma * sizeof(double); + size_t len_vlapl = sz_vlapl * sizeof(double); + size_t len_vtau = sz_vtau * sizeof(double); + + size_t len_v2rho2 = sz_v2rho2 * sizeof(double); + size_t len_v2rhosigma = sz_v2rhosigma * sizeof(double); + size_t len_v2rholapl = sz_v2rholapl * sizeof(double); + size_t len_v2rhotau = sz_v2rhotau * sizeof(double); + size_t len_v2sigma2 = sz_v2sigma2 * sizeof(double); + size_t len_v2sigmalapl = sz_v2sigmalapl * sizeof(double); + size_t len_v2sigmatau = sz_v2sigmatau * sizeof(double); + size_t len_v2lapl2 = sz_v2lapl2 * sizeof(double); + size_t len_v2lapltau = sz_v2lapltau * sizeof(double); + size_t len_v2tau2 = sz_v2tau2 * sizeof(double); + + std::vector rho_host(sz_rho), sigma_host(sz_sigma), + lapl_host(sz_lapl), tau_host(sz_tau); + + std::vector vrho_host(sz_vrho), vsigma_host(sz_vsigma), + vlapl_host(sz_vlapl), vtau_host(sz_vtau); + + std::vector v2rho2_host(sz_v2rho2), v2rhosigma_host(sz_v2rhosigma), + v2rholapl_host(sz_v2rholapl), v2rhotau_host(sz_v2rhotau), + v2sigma2_host(sz_v2sigma2), v2sigmalapl_host(sz_v2sigmalapl), + v2sigmatau_host(sz_v2sigmatau), v2lapl2_host(sz_v2lapl2), + v2lapltau_host(sz_v2lapltau), v2tau2_host(sz_v2tau2); + + recv_from_device(rho_host.data(), rho, len_rho, queue); + recv_from_device(sigma_host.data(), sigma, len_sigma, queue); + recv_from_device(lapl_host.data(), lapl, len_lapl, queue); + recv_from_device(tau_host.data(), tau, len_tau, queue); + + queue_sync(queue); + xc_mgga_vxc_fxc(&kernel_, N, rho_host.data(), sigma_host.data(), + lapl_host.data(), tau_host.data(), + vrho_host.data(), vsigma_host.data(), vlapl_host.data(), vtau_host.data(), + v2rho2_host.data(), v2rhosigma_host.data(), v2rholapl_host.data(), + v2rhotau_host.data(), v2sigma2_host.data(), v2sigmalapl_host.data(), + v2sigmatau_host.data(), v2lapl2_host.data(), v2lapltau_host.data(), + v2tau2_host.data()); + + send_to_device(vrho, vrho_host.data(), len_vrho, queue); + send_to_device(vsigma, vsigma_host.data(), len_vsigma, queue); + send_to_device(vlapl, vlapl_host.data(), len_vlapl, queue); + send_to_device(vtau, vtau_host.data(), len_vtau, queue); + + send_to_device(v2rho2, v2rho2_host.data(), len_v2rho2, queue); + send_to_device(v2rhosigma, v2rhosigma_host.data(), len_v2rhosigma, queue); + send_to_device(v2rholapl, v2rholapl_host.data(), len_v2rholapl, queue); + send_to_device(v2rhotau, v2rhotau_host.data(), len_v2rhotau, queue); + send_to_device(v2sigma2, v2sigma2_host.data(), len_v2sigma2, queue); + send_to_device(v2sigmalapl, v2sigmalapl_host.data(), len_v2sigmalapl, queue); + send_to_device(v2sigmatau, v2sigmatau_host.data(), len_v2sigmatau, queue); + send_to_device(v2lapl2, v2lapl2_host.data(), len_v2lapl2, queue); + send_to_device(v2lapltau, v2lapltau_host.data(), len_v2lapltau, queue); + send_to_device(v2tau2, v2tau2_host.data(), len_v2tau2, queue); + + queue_sync(queue); // Lifetime of host vectors +} + -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC, LibxcKernelImpl::eval_exc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC_VXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, EXC_VXC, LibxcKernelImpl::eval_exc_vxc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC, LibxcKernelImpl::eval_exc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC_VXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, EXC_VXC, LibxcKernelImpl::eval_exc_vxc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC, LibxcKernelImpl::eval_exc_inc_device_, const ) -UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC_VXC, +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, EXC_VXC, LibxcKernelImpl::eval_exc_vxc_inc_device_, const ) + +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, FXC, + LibxcKernelImpl::eval_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, FXC, + LibxcKernelImpl::eval_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, FXC, + LibxcKernelImpl::eval_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( LDA, VXC_FXC, + LibxcKernelImpl::eval_vxc_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( GGA, VXC_FXC, + LibxcKernelImpl::eval_vxc_fxc_inc_device_, const ) +UNUSED_DEVICE_INC_INTERFACE_GENERATOR( MGGA, VXC_FXC, + LibxcKernelImpl::eval_vxc_fxc_inc_device_, const ) + } } diff --git a/src/sycl/xc_functional_device.cxx b/src/sycl/xc_functional_device.cxx index ac36f75..e862dd6 100644 --- a/src/sycl/xc_functional_device.cxx +++ b/src/sycl/xc_functional_device.cxx @@ -2,23 +2,23 @@ * ExchCXX Copyright (c) 2020-2022, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of * any required approvals from the U.S. Dept. of Energy). All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -30,7 +30,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -172,6 +172,81 @@ LDA_EXC_VXC_GENERATOR_DEVICE( XCFunctional::eval_exc_vxc_device ) const { } +LDA_FXC_GENERATOR_DEVICE( XCFunctional::eval_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + const size_t len_fxc_buffer = v2rho2_buffer_len(N); + + double* fxc_scr = nullptr; + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) + fxc_scr = safe_sycl_malloc( len_fxc_buffer, queue ); + + safe_zero( len_fxc_buffer, fxc, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, fxc, queue + ); + } else { + double* fxc_eval = i ? fxc_scr : fxc; + kernels_[i].second.eval_fxc_device(N, rho, fxc_eval, queue); + + if( i ) + add_scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + else + scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + } + } + + if( fxc_scr ) sycl::free( fxc_scr, *queue ); +} + +LDA_VXC_FXC_GENERATOR_DEVICE( XCFunctional::eval_vxc_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT LDA", is_lda() ); + + const size_t len_vxc_buffer = vrho_buffer_len(N); + const size_t len_fxc_buffer = v2rho2_buffer_len(N); + + double* vxc_scr(nullptr), *fxc_scr(nullptr); + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + vxc_scr = safe_sycl_malloc( len_vxc_buffer, queue ); + fxc_scr = safe_sycl_malloc( len_fxc_buffer, queue ); + } + + safe_zero( len_vxc_buffer, vxc, queue ); + safe_zero( len_fxc_buffer, fxc, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, vxc, fxc, queue + ); + } else { + double* vxc_eval = i ? vxc_scr : vxc; + double* fxc_eval = i ? fxc_scr : fxc; + kernels_[i].second.eval_vxc_fxc_device(N, rho, vxc_eval, fxc_eval, queue); + + if( i ) { + add_scal_device( len_vxc_buffer, kernels_[i].first, vxc_eval, vxc, queue ); + add_scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + } else { + scal_device( len_vxc_buffer, kernels_[i].first, vxc_eval, vxc, queue ); + scal_device( len_fxc_buffer, kernels_[i].first, fxc_eval, fxc, queue ); + } + } + } + + if( vxc_scr ) sycl::free( vxc_scr, *queue ); + if( fxc_scr ) sycl::free( fxc_scr, *queue ); +} + // GGA Interfaces @@ -293,8 +368,151 @@ GGA_EXC_VXC_GENERATOR_DEVICE( XCFunctional::eval_exc_vxc_device ) const { if( vsigma_scr ) sycl::free( vsigma_scr, *queue ); } +GGA_FXC_GENERATOR_DEVICE( XCFunctional::eval_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2sigma2_scr(nullptr); + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2sigma2_scr = safe_sycl_malloc( len_v2sigma2_buffer, queue ); + } + + safe_zero( len_v2rho2_buffer, v2rho2, queue ); + safe_zero( len_v2rhosigma_buffer, v2rhosigma, queue ); + safe_zero( len_v2sigma2_buffer, v2sigma2, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, queue + ); + else + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, v2rho2, queue + ); + } else { + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + + if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_device(N, rho, sigma, v2rho2_eval, + v2rhosigma_eval, v2sigma2_eval, queue ); + else + kernels_[i].second.eval_fxc_device(N, rho, v2rho2_eval, queue); + + if( i ) { + add_scal_device( len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue ); + if( kernels_[i].second.is_gga() ){ + add_scal_device( len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue ); + add_scal_device( len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue ); + } + + } else { + scal_device( len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue ); + if( kernels_[i].second.is_gga() ){ + scal_device( len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue ); + scal_device( len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue ); + } + } + } + } + + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); +} + +GGA_VXC_FXC_GENERATOR_DEVICE( XCFunctional::eval_vxc_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT GGA", is_gga() ); + + const size_t len_vrho_buffer = vrho_buffer_len(N); + const size_t len_vsigma_buffer = vsigma_buffer_len(N); + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + + double* vrho_scr(nullptr), *vsigma_scr(nullptr); + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2sigma2_scr(nullptr); + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + vrho_scr = safe_sycl_malloc( len_vrho_buffer, queue ); + vsigma_scr = safe_sycl_malloc( len_vsigma_buffer, queue ); + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2sigma2_scr = safe_sycl_malloc( len_v2sigma2_buffer, queue ); + } + + safe_zero( len_vrho_buffer, vrho, queue ); + safe_zero( len_vsigma_buffer, vsigma, queue ); + safe_zero( len_v2rho2_buffer, v2rho2, queue ); + safe_zero( len_v2rhosigma_buffer, v2rhosigma, queue ); + safe_zero( len_v2sigma2_buffer, v2sigma2, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + if (use_inc) { + if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, queue + ); + } else { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, vrho, v2rho2, queue + ); + } + } else { + double* vrho_eval = i ? vrho_scr : vrho; + double* vsigma_eval = i ? vsigma_scr : vsigma; + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + + if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_device( + N, rho, sigma, vrho_eval, vsigma_eval, v2rho2_eval, v2rhosigma_eval, v2sigma2_eval, queue); + } else { + kernels_[i].second.eval_vxc_fxc_device(N, rho, vrho_eval, v2rho2_eval, queue); + } + + if (i) { + add_scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + add_scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga()) { + add_scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + add_scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + add_scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + } else { + scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + if (kernels_[i].second.is_gga()) { + scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + } + } + } + if( vrho_scr ) sycl::free( vrho_scr, *queue); + if( vsigma_scr ) sycl::free( vsigma_scr, *queue); + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); +} // mGGA Interfaces @@ -453,4 +671,329 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( XCFunctional::eval_exc_vxc_device ) const { if( vtau_scr ) sycl::free( vtau_scr, *queue ); } +MGGA_FXC_GENERATOR_DEVICE( XCFunctional::eval_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); + + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2rholapl_buffer = v2rholapl_buffer_len(N); + const size_t len_v2rhotau_buffer = v2rhotau_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + const size_t len_v2sigmalapl_buffer = v2sigmalapl_buffer_len(N); + const size_t len_v2sigmatau_buffer = v2sigmatau_buffer_len(N); + const size_t len_v2lapl2_buffer = v2lapl2_buffer_len(N); + const size_t len_v2lapltau_buffer = v2lapltau_buffer_len(N); + const size_t len_v2tau2_buffer = v2tau2_buffer_len(N); + + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2rholapl_scr(nullptr), *v2rhotau_scr(nullptr), + *v2sigma2_scr(nullptr), *v2sigmalapl_scr(nullptr), *v2sigmatau_scr(nullptr), *v2lapl2_scr(nullptr), + *v2lapltau_scr(nullptr), *v2tau2_scr(nullptr); + + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2rholapl_scr = safe_sycl_malloc( len_v2rholapl_buffer, queue ); + v2rhotau_scr = safe_sycl_malloc( len_v2rhotau_buffer, queue ); + v2sigma2_scr = safe_sycl_malloc( len_v2sigma2_buffer, queue ); + v2sigmalapl_scr = safe_sycl_malloc( len_v2sigmalapl_buffer, queue ); + v2sigmatau_scr = safe_sycl_malloc( len_v2sigmatau_buffer, queue ); + v2lapl2_scr = safe_sycl_malloc( len_v2lapl2_buffer, queue ); + v2lapltau_scr = safe_sycl_malloc( len_v2lapltau_buffer, queue ); + v2tau2_scr = safe_sycl_malloc( len_v2tau2_buffer, queue ); + } + + safe_zero( len_v2rho2_buffer, v2rho2, queue ); + safe_zero( len_v2rhosigma_buffer, v2rhosigma, queue ); + safe_zero( len_v2rholapl_buffer, v2rholapl, queue ); + safe_zero( len_v2rhotau_buffer, v2rhotau, queue ); + safe_zero( len_v2sigma2_buffer, v2sigma2, queue ); + safe_zero( len_v2sigmalapl_buffer, v2sigmalapl, queue ); + safe_zero( len_v2sigmatau_buffer, v2sigmatau, queue ); + safe_zero( len_v2lapl2_buffer, v2lapl2, queue ); + safe_zero( len_v2lapltau_buffer, v2lapltau, queue ); + safe_zero( len_v2tau2_buffer, v2tau2, queue ); + + for( auto i = 0ul; i < kernels_.size(); ++i ) { + + if( use_inc ) { + if( kernels_[i].second.is_mgga() ) + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, queue + ); + else if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, queue + ); + else + kernels_[i].second.eval_fxc_inc_device( + kernels_[i].first, N, rho, v2rho2, queue + ); + } else { + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2rholapl_eval = i ? v2rholapl_scr : v2rholapl; + double* v2rhotau_eval = i ? v2rhotau_scr : v2rhotau; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + double* v2sigmalapl_eval = i ? v2sigmalapl_scr : v2sigmalapl; + double* v2sigmatau_eval = i ? v2sigmatau_scr : v2sigmatau; + double* v2lapl2_eval = i ? v2lapl2_scr : v2lapl2; + double* v2lapltau_eval = i ? v2lapltau_scr : v2lapltau; + double* v2tau2_eval = i ? v2tau2_scr : v2tau2; + + if( kernels_[i].second.is_mgga() ) + kernels_[i].second.eval_fxc_device(N, rho, sigma, lapl, tau, v2rho2_eval, + v2rhosigma_eval, v2rholapl_eval, v2rhotau_eval, v2sigma2_eval, v2sigmalapl_eval, + v2sigmatau_eval, v2lapl2_eval, v2lapltau_eval, v2tau2_eval, queue); + else if( kernels_[i].second.is_gga() ) + kernels_[i].second.eval_fxc_device(N, rho, sigma, v2rho2_eval, v2rhosigma_eval, v2sigma2_eval, queue); + else + kernels_[i].second.eval_fxc_device(N, rho, v2rho2_eval, queue); + + if (i) { + add_scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if( kernels_[i].second.is_gga() or kernels_[i].second.is_mgga() ){ + add_scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + add_scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if( kernels_[i].second.needs_laplacian() ) { + add_scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + add_scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + add_scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if( kernels_[i].second.is_mgga() ) { + add_scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + add_scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + add_scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if ( kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga() ) { + add_scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + + } else{ + + scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga() or kernels_[i].second.is_mgga()) { + scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if (kernels_[i].second.needs_laplacian()) { + scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if (kernels_[i].second.is_mgga()) { + scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if (kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga()) { + scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + } + } + } + + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2rholapl_scr ) sycl::free( v2rholapl_scr, *queue); + if( v2rhotau_scr ) sycl::free( v2rhotau_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); + if( v2sigmalapl_scr ) sycl::free( v2sigmalapl_scr, *queue); + if( v2sigmatau_scr ) sycl::free( v2sigmatau_scr, *queue); + if( v2lapl2_scr ) sycl::free( v2lapl2_scr, *queue); + if( v2lapltau_scr ) sycl::free( v2lapltau_scr, *queue); + if( v2tau2_scr ) sycl::free( v2tau2_scr, *queue); +} + +MGGA_VXC_FXC_GENERATOR_DEVICE( XCFunctional::eval_vxc_fxc_device ) const { + + throw_if_not_sane(); + EXCHCXX_BOOL_CHECK("KERNEL IS NOT MGGA", is_mgga() ); + + const size_t len_vrho_buffer = vrho_buffer_len(N); + const size_t len_vsigma_buffer = vsigma_buffer_len(N); + const size_t len_vlapl_buffer = vlapl_buffer_len(N); + const size_t len_vtau_buffer = vtau_buffer_len(N); + const size_t len_v2rho2_buffer = v2rho2_buffer_len(N); + const size_t len_v2rhosigma_buffer = v2rhosigma_buffer_len(N); + const size_t len_v2rholapl_buffer = v2rholapl_buffer_len(N); + const size_t len_v2rhotau_buffer = v2rhotau_buffer_len(N); + const size_t len_v2sigma2_buffer = v2sigma2_buffer_len(N); + const size_t len_v2sigmalapl_buffer = v2sigmalapl_buffer_len(N); + const size_t len_v2sigmatau_buffer = v2sigmatau_buffer_len(N); + const size_t len_v2lapl2_buffer = v2lapl2_buffer_len(N); + const size_t len_v2lapltau_buffer = v2lapltau_buffer_len(N); + const size_t len_v2tau2_buffer = v2tau2_buffer_len(N); + + double* vrho_scr(nullptr), *vsigma_scr(nullptr), *vlapl_scr(nullptr), *vtau_scr(nullptr); + double* v2rho2_scr(nullptr), *v2rhosigma_scr(nullptr), *v2rholapl_scr(nullptr), *v2rhotau_scr(nullptr), + *v2sigma2_scr(nullptr), *v2sigmalapl_scr(nullptr), *v2sigmatau_scr(nullptr), *v2lapl2_scr(nullptr), + *v2lapltau_scr(nullptr), *v2tau2_scr(nullptr); + + bool use_inc = supports_inc_interface(); + if( kernels_.size() > 1 && !use_inc ) { + vrho_scr = safe_sycl_malloc( len_vrho_buffer, queue ); + vsigma_scr = safe_sycl_malloc( len_vsigma_buffer, queue ); + vlapl_scr = safe_sycl_malloc( len_vlapl_buffer, queue ); + vtau_scr = safe_sycl_malloc( len_vtau_buffer, queue ); + v2rho2_scr = safe_sycl_malloc( len_v2rho2_buffer, queue ); + v2rhosigma_scr = safe_sycl_malloc( len_v2rhosigma_buffer, queue ); + v2rholapl_scr = safe_sycl_malloc( len_v2rholapl_buffer, queue ); + v2rhotau_scr = safe_sycl_malloc(len_v2rhotau_buffer, queue); + v2sigma2_scr = safe_sycl_malloc(len_v2sigma2_buffer, queue); + v2sigmalapl_scr = safe_sycl_malloc(len_v2sigmalapl_buffer, queue); + v2sigmatau_scr = safe_sycl_malloc(len_v2sigmatau_buffer, queue); + v2lapl2_scr = safe_sycl_malloc(len_v2lapl2_buffer, queue); + v2lapltau_scr = safe_sycl_malloc(len_v2lapltau_buffer, queue); + v2tau2_scr = safe_sycl_malloc(len_v2tau2_buffer, queue); + } + + safe_zero(len_vrho_buffer, vrho, queue); + safe_zero(len_vsigma_buffer, vsigma, queue); + safe_zero(len_vlapl_buffer, vlapl, queue); + safe_zero(len_vtau_buffer, vtau, queue); + safe_zero(len_v2rho2_buffer, v2rho2, queue); + safe_zero(len_v2rhosigma_buffer, v2rhosigma, queue); + safe_zero(len_v2rholapl_buffer, v2rholapl, queue); + safe_zero(len_v2rhotau_buffer, v2rhotau, queue); + safe_zero(len_v2sigma2_buffer, v2sigma2, queue); + safe_zero(len_v2sigmalapl_buffer, v2sigmalapl, queue); + safe_zero(len_v2sigmatau_buffer, v2sigmatau, queue); + safe_zero(len_v2lapl2_buffer, v2lapl2, queue); + safe_zero(len_v2lapltau_buffer, v2lapltau, queue); + safe_zero(len_v2tau2_buffer, v2tau2, queue); + + for (auto i = 0ul; i < kernels_.size(); ++i) { + if( use_inc ) { + if (kernels_[i].second.is_mgga()) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, + v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, + v2lapltau, v2tau2, queue); + } else if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, + v2sigma2, queue); + } else { + kernels_[i].second.eval_vxc_fxc_inc_device( + kernels_[i].first, N, rho, vrho, v2rho2, queue); + } + } else { + double* vrho_eval = i ? vrho_scr : vrho; + double* vsigma_eval = i ? vsigma_scr : vsigma; + double* vlapl_eval = i ? vlapl_scr : vlapl; + double* vtau_eval = i ? vtau_scr : vtau; + double* v2rho2_eval = i ? v2rho2_scr : v2rho2; + double* v2rhosigma_eval = i ? v2rhosigma_scr : v2rhosigma; + double* v2rholapl_eval = i ? v2rholapl_scr : v2rholapl; + double* v2rhotau_eval = i ? v2rhotau_scr : v2rhotau; + double* v2sigma2_eval = i ? v2sigma2_scr : v2sigma2; + double* v2sigmalapl_eval = i ? v2sigmalapl_scr : v2sigmalapl; + double* v2sigmatau_eval = i ? v2sigmatau_scr : v2sigmatau; + double* v2lapl2_eval = i ? v2lapl2_scr : v2lapl2; + double* v2lapltau_eval = i ? v2lapltau_scr : v2lapltau; + double* v2tau2_eval = i ? v2tau2_scr : v2tau2; + + if (kernels_[i].second.is_mgga()) { + kernels_[i].second.eval_vxc_fxc_device( + N, rho, sigma, lapl, tau, vrho_eval, vsigma_eval, vlapl_eval, vtau_eval, + v2rho2_eval, v2rhosigma_eval, v2rholapl_eval, v2rhotau_eval, + v2sigma2_eval, v2sigmalapl_eval, v2sigmatau_eval, v2lapl2_eval, + v2lapltau_eval, v2tau2_eval, queue); + } else if (kernels_[i].second.is_gga()) { + kernels_[i].second.eval_vxc_fxc_device( + N, rho, sigma, vrho_eval, vsigma_eval, v2rho2_eval, v2rhosigma_eval, + v2sigma2_eval, queue); + } else { + kernels_[i].second.eval_vxc_fxc_device(N, rho, vrho_eval, v2rho2_eval, queue); + } + + if (i) { + add_scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + add_scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga() || kernels_[i].second.is_mgga()) { + add_scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + add_scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + add_scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if (kernels_[i].second.needs_laplacian()) { + add_scal_device(len_vlapl_buffer, kernels_[i].first, vlapl_eval, vlapl, queue); + add_scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + add_scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + add_scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if (kernels_[i].second.is_mgga()) { + add_scal_device(len_vtau_buffer, kernels_[i].first, vtau_eval, vtau, queue); + add_scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + add_scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + add_scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if (kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga()) { + add_scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + } else { + scal_device(len_vrho_buffer, kernels_[i].first, vrho_eval, vrho, queue); + scal_device(len_v2rho2_buffer, kernels_[i].first, v2rho2_eval, v2rho2, queue); + + if (kernels_[i].second.is_gga() || kernels_[i].second.is_mgga()) { + scal_device(len_vsigma_buffer, kernels_[i].first, vsigma_eval, vsigma, queue); + scal_device(len_v2rhosigma_buffer, kernels_[i].first, v2rhosigma_eval, v2rhosigma, queue); + scal_device(len_v2sigma2_buffer, kernels_[i].first, v2sigma2_eval, v2sigma2, queue); + } + + if (kernels_[i].second.needs_laplacian()) { + scal_device(len_vlapl_buffer, kernels_[i].first, vlapl_eval, vlapl, queue); + scal_device(len_v2rholapl_buffer, kernels_[i].first, v2rholapl_eval, v2rholapl, queue); + scal_device(len_v2sigmalapl_buffer, kernels_[i].first, v2sigmalapl_eval, v2sigmalapl, queue); + scal_device(len_v2lapl2_buffer, kernels_[i].first, v2lapl2_eval, v2lapl2, queue); + } + + if (kernels_[i].second.is_mgga()) { + scal_device(len_vtau_buffer, kernels_[i].first, vtau_eval, vtau, queue); + scal_device(len_v2rhotau_buffer, kernels_[i].first, v2rhotau_eval, v2rhotau, queue); + scal_device(len_v2sigmatau_buffer, kernels_[i].first, v2sigmatau_eval, v2sigmatau, queue); + scal_device(len_v2tau2_buffer, kernels_[i].first, v2tau2_eval, v2tau2, queue); + } + + if (kernels_[i].second.needs_laplacian() && kernels_[i].second.is_mgga()) { + scal_device(len_v2lapltau_buffer, kernels_[i].first, v2lapltau_eval, v2lapltau, queue); + } + } + } + } + + if( vrho_scr ) sycl::free( vrho_scr, *queue); + if( vsigma_scr ) sycl::free( vsigma_scr, *queue); + if( vlapl_scr ) sycl::free( vlapl_scr, *queue); + if( vtau_scr ) sycl::free( vtau_scr, *queue); + if( v2rho2_scr ) sycl::free( v2rho2_scr, *queue); + if( v2rhosigma_scr ) sycl::free( v2rhosigma_scr, *queue); + if( v2rholapl_scr ) sycl::free( v2rholapl_scr, *queue); + if( v2rhotau_scr ) sycl::free( v2rhotau_scr, *queue); + if( v2sigma2_scr ) sycl::free( v2sigma2_scr, *queue); + if( v2sigmalapl_scr ) sycl::free( v2sigmalapl_scr, *queue); + if( v2sigmatau_scr ) sycl::free( v2sigmatau_scr, *queue); + if( v2lapl2_scr ) sycl::free( v2lapl2_scr, *queue); + if( v2lapltau_scr ) sycl::free( v2lapltau_scr, *queue); + if( v2tau2_scr ) sycl::free( v2tau2_scr, *queue); +} + + } diff --git a/test/xc_kernel_test.cxx b/test/xc_kernel_test.cxx index cb3eb9b..9a90b6a 100644 --- a/test/xc_kernel_test.cxx +++ b/test/xc_kernel_test.cxx @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -462,7 +462,7 @@ void kernel_test( TestInterface interface, Backend backend, Kernel kern, const double fill_val_vs = 50.; const double fill_val_vl = 3.; const double fill_val_vt = 5.; - + const double fill_val_v2rho2 = 10.; const double fill_val_v2rhosigma = 11.; const double fill_val_v2rholapl = 12.; @@ -476,7 +476,7 @@ void kernel_test( TestInterface interface, Backend backend, Kernel kern, const bool use_ref_values = (interface != TestInterface::EXC_INC) and - (interface != TestInterface::EXC_VXC_INC) and + (interface != TestInterface::EXC_VXC_INC) and (interface != TestInterface::FXC_INC) and (interface != TestInterface::VXC_FXC_INC); @@ -692,7 +692,7 @@ void kernel_test( TestInterface interface, Backend backend, Kernel kern, auto ref_vals = use_ref_values ? load_mgga_reference_values( kern, polar, func.needs_laplacian() ) : gen_mgga_reference_values( backend,kern, polar ); - //auto ref_vals = + //auto ref_vals = // gen_mgga_reference_values( backend,kern, polar ); size_t npts = ref_vals.npts; @@ -937,7 +937,7 @@ TEST_CASE( "Libxc Correctness Check", "[xc-libxc]" ) { kernel_test( TestInterface::EXC_VXC, Backend::libxc, Kernel::SCAN_X, Spin::Polarized ); } - + SECTION( "R2SCANL Unpolarized: EXC" ) { kernel_test( TestInterface::EXC, Backend::libxc, Kernel::R2SCANL_X, @@ -984,7 +984,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, const int len_sigma = func_libxc.sigma_buffer_len( npts ); const int len_lapl = func_libxc.lapl_buffer_len( npts ); const int len_tau = func_libxc.tau_buffer_len( npts ); - + const int len_v2rho2 = func_libxc.v2rho2_buffer_len( npts ); const int len_v2rhosigma = func_libxc.v2rhosigma_buffer_len( npts ); const int len_v2rholapl = func_libxc.v2rholapl_buffer_len( npts ); @@ -1029,14 +1029,14 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, tau_use = tau_zero; } - + std::vector exc_libxc( func_builtin.exc_buffer_len(npts) ); std::vector vrho_libxc( func_builtin.vrho_buffer_len(npts) ); std::vector vsigma_libxc( func_builtin.vsigma_buffer_len(npts) ); std::vector vlapl_libxc( func_builtin.vlapl_buffer_len(npts) ); std::vector vtau_libxc( func_builtin.vtau_buffer_len(npts) ); - + std::vector v2rho2_libxc ( len_v2rho2 ); std::vector v2rhosigma_libxc ( len_v2rhosigma ); std::vector v2rholapl_libxc ( len_v2rholapl ); @@ -1053,7 +1053,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, std::vector vsigma_builtin( func_builtin.vsigma_buffer_len(npts) ); std::vector vlapl_builtin( func_builtin.vlapl_buffer_len(npts) ); std::vector vtau_builtin( func_builtin.vtau_buffer_len(npts) ); - + std::vector v2rho2_builtin ( func_builtin.v2rho2_buffer_len(npts) ); std::vector v2rhosigma_builtin ( func_builtin.v2rhosigma_buffer_len(npts) ); std::vector v2rholapl_builtin ( func_builtin.v2rholapl_buffer_len(npts) ); @@ -1080,15 +1080,15 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, vrho_builtin.data() ); } else if( interface == TestInterface::FXC ) { - + func_libxc.eval_fxc( npts, rho_use.data(), v2rho2_libxc.data() ); func_builtin.eval_fxc( npts, rho_use.data(), v2rho2_builtin.data() ); - + } else if( interface == TestInterface::VXC_FXC ) { - + func_libxc.eval_vxc_fxc( npts, rho_use.data(), vrho_libxc.data(), v2rho2_libxc.data() ); func_builtin.eval_vxc_fxc( npts, rho_use.data(), vrho_builtin.data(), v2rho2_builtin.data() ); - + } } else if( func_libxc.is_gga() ) { @@ -1108,19 +1108,19 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, exc_builtin.data(), vrho_builtin.data(), vsigma_builtin.data() ); } else if( interface == TestInterface::FXC ) { - + func_libxc.eval_fxc( npts, rho_use.data(), sigma_use.data(), v2rho2_libxc.data(), v2rhosigma_libxc.data(), v2sigma2_libxc.data() ); func_builtin.eval_fxc( npts, rho_use.data(), sigma_use.data(), v2rho2_builtin.data(), v2rhosigma_builtin.data(), v2sigma2_builtin.data() ); - + } else if( interface == TestInterface::VXC_FXC ) { - + func_libxc.eval_vxc_fxc( npts, rho_use.data(), sigma_use.data(), vrho_libxc.data(), vsigma_libxc.data(), v2rho2_libxc.data(), v2rhosigma_libxc.data(), v2sigma2_libxc.data() ); func_builtin.eval_vxc_fxc( npts, rho_use.data(), sigma_use.data(), vrho_builtin.data(), vsigma_builtin.data(), v2rho2_builtin.data(), v2rhosigma_builtin.data(), v2sigma2_builtin.data() ); - + } } else if( func_libxc.is_mgga() ) { @@ -1140,7 +1140,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, lapl_use.data(), tau_use.data(), exc_builtin.data(), vrho_builtin.data(), vsigma_builtin.data(), vlapl_builtin.data(), vtau_builtin.data() ); } else if( interface == TestInterface::FXC ) { - + func_libxc.eval_fxc( npts, rho_use.data(), sigma_use.data(), lapl_use.data(), tau_use.data(), v2rho2_libxc.data(), v2rhosigma_libxc.data(), v2rholapl_libxc.data(), @@ -1153,9 +1153,9 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, v2rhotau_builtin.data(), v2sigma2_builtin.data(), v2sigmalapl_builtin.data(), v2sigmatau_builtin.data(), v2lapl2_builtin.data(), v2lapltau_builtin.data(), v2tau2_builtin.data() ); - + } else if( interface == TestInterface::VXC_FXC ) { - + func_libxc.eval_vxc_fxc( npts, rho_use.data(), sigma_use.data(), lapl_use.data(), tau_use.data(), vrho_libxc.data(), vsigma_libxc.data(), vlapl_libxc.data(), vtau_libxc.data(), @@ -1170,7 +1170,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, v2rhotau_builtin.data(), v2sigma2_builtin.data(), v2sigmalapl_builtin.data(), v2sigmatau_builtin.data(), v2lapl2_builtin.data(), v2lapltau_builtin.data(), v2tau2_builtin.data() ); - + } } @@ -1201,7 +1201,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, CHECK( vtau_builtin[i] == Approx(vtau_libxc[i]) ); } } - + if( interface == TestInterface::FXC || interface == TestInterface::VXC_FXC ) { for( auto i = 0ul; i < len_v2rho2; ++i ) { INFO( "V2RHO2 Fails: Kernel is " << kern << ", builtin = " << v2rho2_builtin[i] << ", libxc = " << v2rho2_libxc[i] ); @@ -1264,7 +1264,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Regular Eval : EXC + VXC" ) { @@ -1272,7 +1272,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC_VXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Regular Eval : FXC" ) { @@ -1281,7 +1281,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::FXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Regular Eval : VXC + FXC" ) { @@ -1290,14 +1290,14 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::VXC_FXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Small Eval : EXC" ) { for( auto kern : builtin_supported_kernels ) { if(is_unstable_small(kern)) continue; if(is_epc(kern)) continue; - + compare_libxc_builtin( TestInterface::EXC, EvalType::Small, kern, Spin::Unpolarized ); } @@ -1337,7 +1337,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Zero Eval : EXC + VXC" ) { @@ -1345,7 +1345,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC_VXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Zero Eval : FXC" ) { @@ -1354,7 +1354,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::FXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Zero Eval : VXC + FXC" ) { @@ -1363,7 +1363,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::VXC_FXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } @@ -1383,7 +1383,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC_VXC, EvalType::Regular, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Regular Eval : FXC" ) { @@ -1401,7 +1401,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::VXC_FXC, EvalType::Regular, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Small Eval : EXC" ) { @@ -1449,7 +1449,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC, EvalType::Zero, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Zero Eval : EXC + VXC" ) { @@ -1466,7 +1466,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::FXC, EvalType::Zero, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Zero Eval : VXC + FXC" ) { @@ -1523,7 +1523,7 @@ TEST_CASE( "Scale and Increment Interface", "[xc-inc]" ) { } SECTION( "Builtin Polarized EXC + VXC" ) { - for( auto kern : builtin_supported_kernels ) + for( auto kern : builtin_supported_kernels ) kernel_test( TestInterface::EXC_VXC_INC, Backend::builtin, kern, Spin::Polarized ); } @@ -1623,7 +1623,7 @@ void device_synchronize() { } -void test_cuda_hip_interface( TestInterface interface, EvalType evaltype, +void test_device_interface( TestInterface interface, EvalType evaltype, Backend backend, Kernel kern, Spin polar ) { size_t npts_lda, npts_gga, npts_mgga, npts_lapl; @@ -1861,7 +1861,7 @@ void test_cuda_hip_interface( TestInterface interface, EvalType evaltype, safe_cuda_cpy( v2lapltau_device, v2lapltau.data(), len_v2lapltau ); } - + // Evaluate functional on device cudaStream_t stream = 0; if( interface == TestInterface::EXC ) { @@ -1907,7 +1907,7 @@ void test_cuda_hip_interface( TestInterface interface, EvalType evaltype, exc_device, vrho_device, vsigma_device, stream ); else if( func.is_mgga() ) func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, - lapl_device, tau_device, exc_device, vrho_device, vsigma_device, + lapl_device, tau_device, exc_device, vrho_device, vsigma_device, vlapl_device, vtau_device, stream ); } else if( interface == TestInterface::FXC ) { @@ -2144,1332 +2144,1122 @@ void test_cuda_hip_interface( TestInterface interface, EvalType evaltype, } +#endif // EXCHCXX_ENABLE_CUDA/HIP -TEST_CASE( "CUDA Interfaces", "[xc-device]" ) { - - SECTION( "Libxc Functionals" ) { - - SECTION( "LDA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +#ifdef EXCHCXX_ENABLE_SYCL - SECTION( "LDA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +inline sycl::queue q{ sycl::default_selector_v, + sycl::property_list{sycl::property::queue::in_order{}} }; - SECTION( "LDA Functionals: FXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +template +T* safe_sycl_malloc( size_t n, sycl::queue& q ) { + if( n ) { + T* ptr = sycl::malloc_device(n, q); + return ptr; + } else return nullptr; +} - SECTION( "LDA Functionals: VXC + FXC Regular Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +template +void safe_sycl_cpy( T* dest, const T* src, size_t len, sycl::queue& q ) { - SECTION( "GGA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } + q.memcpy( (void*)dest, (const void*)src, len*sizeof(T) ); - SECTION( "GGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +} - SECTION( "GGA Functionals: FXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +void sycl_free_all(sycl::queue&){ } +template +void sycl_free_all( sycl::queue& q, T* ptr, Args&&... args ) { - SECTION( "GGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( ptr ) { + sycl::free( (void*)ptr, q ); + } - SECTION( "MGGA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } + sycl_free_all( q, std::forward(args)... ); - SECTION( "MGGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } +} - SECTION( "MGGA Functionals: FXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +void device_synchronize( sycl::queue& q ) { +q.wait_and_throw(); +} - SECTION( "MGGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized ); - } - } +void test_device_interface( TestInterface interface, EvalType evaltype, + Backend backend, Kernel kern, Spin polar) { + size_t npts_lda, npts_gga, npts_mgga, npts_lapl; + std::vector ref_rho, ref_sigma, ref_lapl, ref_tau; + std::tie(npts_lda, ref_rho ) = load_reference_density( polar ); + std::tie(npts_gga, ref_sigma) = load_reference_sigma ( polar ); + std::tie(npts_lapl, ref_lapl) = load_reference_lapl ( polar ); + std::tie(npts_mgga, ref_tau) = load_reference_tau ( polar ); - SECTION( "LDA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + REQUIRE( npts_lda == npts_gga ); + REQUIRE( npts_lda == npts_mgga ); + REQUIRE( npts_lda == npts_lapl ); - SECTION( "LDA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + const int npts = 1;//npts_lda; - SECTION( "LDA Functionals: FXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if (polar == Spin::Unpolarized && !supports_unpolarized(kern)){ + CHECK_THROWS( XCKernel( backend, kern, polar ) ); + return; + } + XCKernel func( backend, kern, polar ); - SECTION( "LDA Functionals: VXC + FXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + size_t len_rho_buffer = func.rho_buffer_len(npts); + size_t len_sigma_buffer = func.sigma_buffer_len(npts); + size_t len_lapl_buffer = func.lapl_buffer_len(npts); + size_t len_tau_buffer = func.tau_buffer_len(npts); + size_t len_exc_buffer = func.exc_buffer_len(npts); + size_t len_vrho_buffer = func.vrho_buffer_len(npts); + size_t len_vsigma_buffer = func.vsigma_buffer_len(npts); + size_t len_vlapl_buffer = func.vlapl_buffer_len(npts); + size_t len_vtau_buffer = func.vtau_buffer_len(npts); - SECTION( "GGA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + size_t len_v2rho2 = func.v2rho2_buffer_len(npts); + size_t len_v2rhosigma = func.v2rhosigma_buffer_len(npts); + size_t len_v2rholapl = func.v2rholapl_buffer_len(npts); + size_t len_v2rhotau = func.v2rhotau_buffer_len(npts); + size_t len_v2sigma2 = func.v2sigma2_buffer_len(npts); + size_t len_v2sigmalapl = func.v2sigmalapl_buffer_len(npts); + size_t len_v2sigmatau = func.v2sigmatau_buffer_len(npts); + size_t len_v2lapl2 = func.v2lapl2_buffer_len(npts); + size_t len_v2lapltau = func.v2lapltau_buffer_len(npts); + size_t len_v2tau2 = func.v2tau2_buffer_len(npts); - SECTION( "GGA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector rho_small(len_rho_buffer, 1e-13); + std::vector sigma_small(len_sigma_buffer, 1e-14); + std::vector lapl_small(len_lapl_buffer, 1e-14); + std::vector tau_small(len_tau_buffer, 1e-14); - SECTION( "GGA Functionals: FXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector rho_zero(len_rho_buffer, 0.); + std::vector sigma_zero(len_sigma_buffer, 0.); + std::vector lapl_zero(len_lapl_buffer, 0.); + std::vector tau_zero(len_tau_buffer, 0.); - SECTION( "GGA Functionals: VXC + FXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector rho, sigma, lapl, tau; - SECTION( "MGGA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( evaltype == EvalType::Regular ) { + rho = ref_rho; + sigma = ref_sigma; + lapl = ref_lapl; + tau = ref_tau; + } - SECTION( "MGGA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( evaltype == EvalType::Small ) { + rho = rho_small; + sigma = sigma_small; + lapl = lapl_small; + tau = tau_small; + } - SECTION( "MGGA Functionals: FXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; + if( evaltype == EvalType::Zero ) { + rho = rho_zero; + sigma = sigma_zero; + lapl = lapl_zero; + tau = tau_zero; + } - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + // Get Reference Values + std::vector + exc_ref( len_exc_buffer ), + vrho_ref( len_vrho_buffer ), + vsigma_ref( len_vsigma_buffer ), + vlapl_ref( len_vlapl_buffer ), + vtau_ref( len_vtau_buffer ); - SECTION( "MGGA Functionals: VXC + FXC Small Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + std::vector + v2rho2_ref( len_v2rho2 ), + v2rhosigma_ref( len_v2rhosigma ), + v2rholapl_ref( len_v2rholapl ), + v2rhotau_ref( len_v2rhotau ), + v2sigma2_ref( len_v2sigma2 ), + v2sigmalapl_ref( len_v2sigmalapl ), + v2sigmatau_ref( len_v2sigmatau ), + v2lapl2_ref( len_v2lapl2 ), + v2lapltau_ref( len_v2lapltau ), + v2tau2_ref( len_v2tau2 ); - SECTION( "LDA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + if( interface == TestInterface::EXC or interface == TestInterface::EXC_INC ) { - SECTION( "LDA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + if( func.is_lda() ) + func.eval_exc( npts, rho.data(), exc_ref.data() ); + else if( func.is_gga() ) + func.eval_exc( npts, rho.data(), sigma.data(), exc_ref.data() ); + else if( func.is_mgga() ) + func.eval_exc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), exc_ref.data() ); - SECTION( "LDA Functionals: FXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + } else if( interface == TestInterface::EXC_VXC or interface == TestInterface::EXC_VXC_INC ) { - SECTION( "LDA Functionals: VXC + FXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + if( func.is_lda() ) + func.eval_exc_vxc( npts, rho.data(), exc_ref.data(), vrho_ref.data() ); + else if( func.is_gga() ) + func.eval_exc_vxc( npts, rho.data(), sigma.data(), exc_ref.data(), + vrho_ref.data(), vsigma_ref.data() ); + else if( func.is_mgga() ) + func.eval_exc_vxc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), + exc_ref.data(), vrho_ref.data(), vsigma_ref.data(), vlapl_ref.data(), vtau_ref.data() ); - SECTION( "GGA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + } else if( interface == TestInterface::FXC or interface == TestInterface::FXC_INC ) { - SECTION( "GGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } + if( func.is_lda() ) + func.eval_fxc( npts, rho.data(), v2rho2_ref.data() ); + else if( func.is_gga() ) + func.eval_fxc( npts, rho.data(), sigma.data(), v2rho2_ref.data(), + v2rhosigma_ref.data(), v2sigma2_ref.data() ); + else if( func.is_mgga() ) + func.eval_fxc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), + v2rho2_ref.data(), v2rhosigma_ref.data(), v2rholapl_ref.data(), + v2rhotau_ref.data(), v2sigma2_ref.data(), v2sigmalapl_ref.data(), + v2sigmatau_ref.data(), v2lapl2_ref.data(), v2lapltau_ref.data(), + v2tau2_ref.data() ); + } else if( interface == TestInterface::VXC_FXC or interface == TestInterface::VXC_FXC_INC ) { + if( func.is_lda() ) + func.eval_vxc_fxc( npts, rho.data(), vrho_ref.data(), v2rho2_ref.data() ); + else if( func.is_gga() ) + func.eval_vxc_fxc( npts, rho.data(), sigma.data(), vrho_ref.data(), + vsigma_ref.data(), v2rho2_ref.data(), v2rhosigma_ref.data(), + v2sigma2_ref.data() ); + else if( func.is_mgga() ) + func.eval_vxc_fxc( npts, rho.data(), sigma.data(), lapl.data(), tau.data(), + vrho_ref.data(), vsigma_ref.data(), vlapl_ref.data(), vtau_ref.data(), + v2rho2_ref.data(), v2rhosigma_ref.data(), v2rholapl_ref.data(), + v2rhotau_ref.data(), v2sigma2_ref.data(), v2sigmalapl_ref.data(), + v2sigmatau_ref.data(), v2lapl2_ref.data(), v2lapltau_ref.data(), + v2tau2_ref.data() ); + } - SECTION( "GGA Functionals: FXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } - SECTION( "GGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } - SECTION( "MGGA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - SECTION( "MGGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - SECTION( "MGGA Functionals: FXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } - SECTION( "MGGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized ); - } - } + // Allocate device memory + double* rho_device = safe_sycl_malloc( len_rho_buffer , q); + double* sigma_device = safe_sycl_malloc( len_sigma_buffer , q); + double* lapl_device = safe_sycl_malloc( len_lapl_buffer , q); + double* tau_device = safe_sycl_malloc( len_tau_buffer , q); + double* exc_device = safe_sycl_malloc( len_exc_buffer , q); + double* vrho_device = safe_sycl_malloc( len_vrho_buffer , q); + double* vsigma_device = safe_sycl_malloc( len_vsigma_buffer , q); + double* vlapl_device = safe_sycl_malloc( len_vlapl_buffer , q); + double* vtau_device = safe_sycl_malloc( len_vtau_buffer , q); + + double* v2rho2_device = safe_sycl_malloc( len_v2rho2 , q); + double* v2rhosigma_device = safe_sycl_malloc( len_v2rhosigma , q); + double* v2rholapl_device = safe_sycl_malloc( len_v2rholapl , q); + double* v2rhotau_device = safe_sycl_malloc( len_v2rhotau , q); + double* v2sigma2_device = safe_sycl_malloc( len_v2sigma2 , q); + double* v2sigmalapl_device = safe_sycl_malloc( len_v2sigmalapl , q); + double* v2sigmatau_device = safe_sycl_malloc( len_v2sigmatau , q); + double* v2lapl2_device = safe_sycl_malloc( len_v2lapl2 , q); + double* v2lapltau_device = safe_sycl_malloc( len_v2lapltau , q); + double* v2tau2_device = safe_sycl_malloc( len_v2tau2 , q); + // H2D Copy of rho / sigma + safe_sycl_cpy( rho_device, rho.data(), len_rho_buffer, q); + if( func.is_gga() or func.is_mgga() ) + safe_sycl_cpy( sigma_device, sigma.data(), len_sigma_buffer, q); + if( func.is_mgga() ) + safe_sycl_cpy( tau_device, tau.data(), len_tau_buffer, q); + if( func.needs_laplacian() ) + safe_sycl_cpy( lapl_device, lapl.data(), len_lapl_buffer, q); + const double alpha = 3.14; + const double fill_val_e = 0.1; + const double fill_val_vr = 1.; + const double fill_val_vs = 2.; + const double fill_val_vl = 3.; + const double fill_val_vt = 4.; + const double fill_val_v2rho2 = 10.; + const double fill_val_v2rhosigma = 11.; + const double fill_val_v2rholapl = 12.; + const double fill_val_v2rhotau = 13.; + const double fill_val_v2sigma2 = 14.; + const double fill_val_v2sigmalapl = 15.; + const double fill_val_v2sigmatau = 16.; + const double fill_val_v2lapl2 = 17.; + const double fill_val_v2lapltau = 18.; + const double fill_val_v2tau2 = 19.; + std::vector + exc( len_exc_buffer, fill_val_e ), vrho( len_vrho_buffer, fill_val_vr ), + vsigma( len_vsigma_buffer, fill_val_vs ), vlapl(len_vlapl_buffer, fill_val_vl), + vtau(len_vtau_buffer, fill_val_vt); + std::vector + v2rho2( len_v2rho2, fill_val_v2rho2 ), + v2rhosigma( len_v2rhosigma, fill_val_v2rhosigma ), + v2rholapl( len_v2rholapl, fill_val_v2rholapl ), + v2rhotau( len_v2rhotau, fill_val_v2rhotau ), + v2sigma2( len_v2sigma2, fill_val_v2sigma2 ), + v2sigmalapl( len_v2sigmalapl, fill_val_v2sigmalapl ), + v2sigmatau( len_v2sigmatau, fill_val_v2sigmatau ), + v2lapl2( len_v2lapl2, fill_val_v2lapl2 ), + v2lapltau( len_v2lapltau, fill_val_v2lapltau ), + v2tau2( len_v2tau2, fill_val_v2tau2 ); + // H2D copy of initial values, tests clobber / increment + safe_sycl_cpy( exc_device, exc.data(), len_exc_buffer, q); + safe_sycl_cpy( vrho_device, vrho.data(), len_vrho_buffer, q); + safe_sycl_cpy( v2rho2_device, v2rho2.data(), len_v2rho2, q); + if( func.is_gga() or func.is_mgga() ){ + safe_sycl_cpy( vsigma_device, vsigma.data(), len_vsigma_buffer, q); + safe_sycl_cpy( v2rhosigma_device, v2rhosigma.data(), len_v2rhosigma, q); + safe_sycl_cpy( v2sigma2_device, v2sigma2.data(), len_v2sigma2, q); + } + if( func.is_mgga() ){ + safe_sycl_cpy( vtau_device, vtau.data(), len_vtau_buffer, q); + safe_sycl_cpy( v2rhotau_device, v2rhotau.data(), len_v2rhotau, q); + safe_sycl_cpy( v2sigmatau_device, v2sigmatau.data(), len_v2sigmatau, q); + safe_sycl_cpy( v2tau2_device, v2tau2.data(), len_v2tau2, q); + } + if( func.needs_laplacian() ){ + safe_sycl_cpy( vlapl_device, vlapl.data(), len_vlapl_buffer, q); + safe_sycl_cpy( v2rholapl_device, v2rholapl.data(), len_v2rholapl, q); + safe_sycl_cpy( v2sigmalapl_device, v2sigmalapl.data(), len_v2sigmalapl, q); + safe_sycl_cpy( v2lapl2_device, v2lapl2.data(), len_v2lapl2, q); + safe_sycl_cpy( v2lapltau_device, v2lapltau.data(), len_v2lapltau, q); + } - SECTION( "LDA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - SECTION( "LDA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + // Evaluate functional on device + if( interface == TestInterface::EXC ) { - SECTION( "LDA Functionals: FXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( func.is_lda() ) + func.eval_exc_device( npts, rho_device, exc_device, &q ); + else if( func.is_gga() ) + func.eval_exc_device( npts, rho_device, sigma_device, exc_device, + &q ); + else if( func.is_mgga() ) + func.eval_exc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + exc_device, &q ); - SECTION( "LDA Functionals: VXC + FXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + } else if( interface == TestInterface::EXC_INC ) { - SECTION( "GGA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + if( func.is_lda() ) + func.eval_exc_inc_device( alpha, npts, rho_device, exc_device, &q ); + else if( func.is_gga() ) + func.eval_exc_inc_device( alpha, npts, rho_device, sigma_device, exc_device, + &q ); + else if( func.is_mgga() ) + func.eval_exc_inc_device( alpha, npts, rho_device, sigma_device, lapl_device, + tau_device, exc_device, &q ); - SECTION( "GGA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + } else if( interface == TestInterface::EXC_VXC ) { - SECTION( "GGA Functionals: FXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( func.is_lda() ) + func.eval_exc_vxc_device( npts, rho_device, exc_device, vrho_device, &q ); + else if( func.is_gga() ) + func.eval_exc_vxc_device( npts, rho_device, sigma_device, exc_device, + vrho_device, vsigma_device, &q ); + else if( func.is_mgga() ) + func.eval_exc_vxc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + exc_device, vrho_device, vsigma_device, vlapl_device, vtau_device, &q ); - SECTION( "GGA Functionals: VXC + FXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } + } else if( interface == TestInterface::EXC_VXC_INC ) { - SECTION( "MGGA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + if( func.is_lda() ) + func.eval_exc_vxc_inc_device( alpha, npts, rho_device, exc_device, + vrho_device, &q ); + else if( func.is_gga() ) + func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, + exc_device, vrho_device, vsigma_device, &q ); + else if( func.is_mgga() ) + func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, + lapl_device, tau_device, exc_device, vrho_device, vsigma_device, + vlapl_device, vtau_device, &q ); - SECTION( "MGGA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } + } else if( interface == TestInterface::FXC ) { - SECTION( "MGGA Functionals: FXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "MGGA Functionals: VXC + FXC Regular Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: EXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: EXC + VXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: FXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: VXC + FXC Small Eval Polarized" ) { - for( auto kern : lda_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - - SECTION( "GGA Functionals: EXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "GGA Functionals: EXC + VXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "GGA Functionals: FXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "GGA Functionals: VXC + FXC Small Eval Polarized" ) { - for( auto kern : gga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "MGGA Functionals: EXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "MGGA Functionals: EXC + VXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "MGGA Functionals: FXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "MGGA Functionals: VXC + FXC Small Eval Polarized" ) { - for( auto kern : mgga_kernels ) { - if(is_unstable_small(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - - - SECTION( "LDA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - - SECTION( "LDA Functionals: FXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "LDA Functionals: VXC + FXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "GGA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - - SECTION( "GGA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - - SECTION( "GGA Functionals: FXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "GGA Functionals: VXC + FXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "MGGA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - - SECTION( "MGGA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - - SECTION( "MGGA Functionals: FXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - } - - SECTION( "MGGA Functionals: VXC + FXC Zero Eval Polarized" ) { - for( auto kern : mgga_kernels ){ - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized ); - } - } + if( func.is_lda() ) + func.eval_fxc_device( npts, rho_device, v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_fxc_device( npts, rho_device, sigma_device, v2rho2_device, + v2rhosigma_device, v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_fxc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + v2rho2_device, v2rhosigma_device, v2rholapl_device, v2rhotau_device, + v2sigma2_device, v2sigmalapl_device, v2sigmatau_device, + v2lapl2_device, v2lapltau_device, v2tau2_device, &q ); + } else if( interface == TestInterface::FXC_INC ) { + if( func.is_lda() ) + func.eval_fxc_inc_device( alpha, npts, rho_device, v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_fxc_inc_device( alpha, npts, rho_device, sigma_device, + v2rho2_device, v2rhosigma_device, v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_fxc_inc_device( alpha, npts, rho_device, sigma_device, + lapl_device, tau_device, v2rho2_device, v2rhosigma_device, + v2rholapl_device, v2rhotau_device, v2sigma2_device, + v2sigmalapl_device, v2sigmatau_device, v2lapl2_device, + v2lapltau_device, v2tau2_device, &q ); + } else if( interface == TestInterface::VXC_FXC ) { + if( func.is_lda() ) + func.eval_vxc_fxc_device( npts, rho_device, vrho_device, v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_vxc_fxc_device( npts, rho_device, sigma_device, vrho_device, + vsigma_device, v2rho2_device, v2rhosigma_device, v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_vxc_fxc_device( npts, rho_device, sigma_device, lapl_device, tau_device, + vrho_device, vsigma_device, vlapl_device, vtau_device, + v2rho2_device, v2rhosigma_device, v2rholapl_device, + v2rhotau_device, v2sigma2_device, v2sigmalapl_device, + v2sigmatau_device, v2lapl2_device, v2lapltau_device, + v2tau2_device, &q ); + } else if( interface == TestInterface::VXC_FXC_INC ) { + if( func.is_lda() ) + func.eval_vxc_fxc_inc_device( alpha, npts, rho_device, vrho_device, + v2rho2_device, &q ); + else if( func.is_gga() ) + func.eval_vxc_fxc_inc_device( alpha, npts, rho_device, sigma_device, + vrho_device, vsigma_device, v2rho2_device, v2rhosigma_device, + v2sigma2_device, &q ); + else if( func.is_mgga() ) + func.eval_vxc_fxc_inc_device( alpha, npts, rho_device, sigma_device, + lapl_device, tau_device, vrho_device, vsigma_device, + vlapl_device, vtau_device, v2rho2_device, v2rhosigma_device, + v2rholapl_device, v2rhotau_device, v2sigma2_device, + v2sigmalapl_device, v2sigmatau_device, v2lapl2_device, + v2lapltau_device, v2tau2_device, &q ); } - SECTION( "Builtin Functionals" ) { - - SECTION("EXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("EXC + VXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("FXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("EXC + VXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("FXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC + INC Regular: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + VXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("FXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + VXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("FXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("EXC + VXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - - SECTION("FXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("VXC + FXC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } - - SECTION("EXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } + device_synchronize( q ); - SECTION("EXC + VXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } + // D2H of results + safe_sycl_cpy( exc.data(), exc_device, len_exc_buffer, q); + safe_sycl_cpy( vrho.data(), vrho_device, len_vrho_buffer, q); + safe_sycl_cpy( v2rho2.data(), v2rho2_device, len_v2rho2, q); + if( func.is_gga() or func.is_mgga() ){ + safe_sycl_cpy( vsigma.data(), vsigma_device, len_vsigma_buffer, q); + safe_sycl_cpy( v2rhosigma.data(), v2rhosigma_device, len_v2rhosigma, q); + safe_sycl_cpy( v2sigma2.data(), v2sigma2_device, len_v2sigma2, q); + } + if( func.is_mgga() ){ + safe_sycl_cpy( vtau.data(), vtau_device, len_vtau_buffer, q); + safe_sycl_cpy( v2rhotau.data(), v2rhotau_device, len_v2rhotau, q); + safe_sycl_cpy( v2sigmatau.data(), v2sigmatau_device, len_v2sigmatau, q); + safe_sycl_cpy( v2tau2.data(), v2tau2_device, len_v2tau2, q); + } + if( func.needs_laplacian() ){ + safe_sycl_cpy( vlapl.data(), vlapl_device, len_vlapl_buffer, q); + safe_sycl_cpy( v2rholapl.data(), v2rholapl_device, len_v2rholapl, q); + safe_sycl_cpy( v2sigmalapl.data(), v2sigmalapl_device, len_v2sigmalapl, q); + safe_sycl_cpy( v2lapl2.data(), v2lapl2_device, len_v2lapl2, q); + safe_sycl_cpy( v2lapltau.data(), v2lapltau_device, len_v2lapltau, q); + } - SECTION("FXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } + // Check correctness + if( interface == TestInterface::EXC_INC or interface == TestInterface::EXC_VXC_INC ) { + for( auto i = 0ul; i < len_exc_buffer; ++i ) + CHECK( exc[i] == Approx(fill_val_e + alpha * exc_ref[i]) ); + } else if( interface == TestInterface::EXC or interface == TestInterface::EXC_VXC ) { + for( auto i = 0ul; i < len_exc_buffer; ++i ) + CHECK( exc[i] == Approx(exc_ref[i]) ); + } - SECTION("VXC + FXC + INC Zero: Unpolarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized ); - } - } + if( interface == TestInterface::EXC_VXC_INC or interface == TestInterface::VXC_FXC_INC ) { + for( auto i = 0ul; i < len_vrho_buffer; ++i ) + CHECK( vrho[i] == Approx(fill_val_vr + alpha * vrho_ref[i]) ); + for( auto i = 0ul; i < len_vsigma_buffer; ++i ) + CHECK( vsigma[i] == Approx(fill_val_vs + alpha * vsigma_ref[i]) ); + for( auto i = 0ul; i < len_vlapl_buffer; ++i ) + CHECK( vlapl[i] == Approx(fill_val_vl + alpha * vlapl_ref[i]) ); + for( auto i = 0ul; i < len_vtau_buffer; ++i ) + CHECK( vtau[i] == Approx(fill_val_vt + alpha * vtau_ref[i]) ); + } else if(interface == TestInterface::EXC_VXC or interface == TestInterface::VXC_FXC) { - SECTION("EXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_vrho_buffer; ++i ){ + INFO( "Kernel is " << kern ); + CHECK( vrho[i] == Approx(vrho_ref[i]) ); } - - SECTION("EXC + VXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_vsigma_buffer; ++i ) { + INFO( "vsigma Fails: Kernel is " << kern << ", builtin device = " << vsigma[i] << ", builtin = " << vsigma_ref[i] ); + bool is_close = (vsigma[i] == Approx(vsigma_ref[i]) || vsigma[i] == Approx(vsigma_ref[i]).margin(1e-13)); + CHECK( is_close ); } - - SECTION("FXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_vlapl_buffer; ++i ) { + INFO( "Kernel is " << kern ); + CHECK( vlapl[i] == Approx(vlapl_ref[i]).margin(std::numeric_limits::epsilon()) ); } - - SECTION("VXC + FXC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_vtau_buffer; ++i ) { + INFO( "Kernel is " << kern << std::scientific << " " << vtau[i] << " " << vtau_ref[i] ); + CHECK( vtau[i] == Approx(vtau_ref[i]).margin(std::numeric_limits::epsilon()) ); } - SECTION("EXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } + } - SECTION("EXC + VXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); + if( interface == TestInterface::FXC or interface == TestInterface::VXC_FXC ) { + for( auto i = 0ul; i < len_v2rho2; ++i ) { + INFO( "V2RHO2 Fails: Kernel is " << kern << ", builtin device = " << v2rho2[i] << ", builtin = " << v2rho2_ref[i] ); + bool is_close = (v2rho2[i] == Approx(v2rho2_ref[i]) || v2rho2[i] == Approx(v2rho2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2rhosigma; ++i ) { + INFO( "V2RHOSIGMA Fails: Kernel is " << kern << ", builtin device = " << v2rhosigma[i] << ", builtin = " << v2rhosigma_ref[i] ); + bool is_close = (v2rhosigma[i] == Approx(v2rhosigma_ref[i]) || v2rhosigma[i] == Approx(v2rhosigma_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("VXC + FXC + INC Regular: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2rholapl; ++i ) { + INFO( "V2RHOLAPL Fails: Kernel is " << kern << ", builtin device = " << v2rholapl[i] << ", builtin = " << v2rholapl_ref[i] ); + bool is_close = (v2rholapl[i] == Approx(v2rholapl_ref[i]) || v2rholapl[i] == Approx(v2rholapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2rhotau; ++i ) { + INFO( "V2RHOTAU Fails: Kernel is " << kern << ", builtin device = " << v2rhotau[i] << ", builtin = " << v2rhotau_ref[i] ); + bool is_close = (v2rhotau[i] == Approx(v2rhotau_ref[i]) || v2rhotau[i] == Approx(v2rhotau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + VXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigma2; ++i ) { + INFO( "V2SIGMA2 Fails: Kernel is " << kern << ", builtin device = " << v2sigma2[i] << ", builtin = " << v2sigma2_ref[i] ); + bool is_close = (v2sigma2[i] == Approx(v2sigma2_ref[i]) || v2sigma2[i] == Approx(v2sigma2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigmalapl; ++i ) { + INFO( "V2SIGMALAPL Fails: Kernel is " << kern << ", builtin device = " << v2sigmalapl[i] << ", builtin = " << v2sigmalapl_ref[i] ); + bool is_close = (v2sigmalapl[i] == Approx(v2sigmalapl_ref[i]) || v2sigmalapl[i] == Approx(v2sigmalapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("VXC + FXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigmatau; ++i ) { + INFO( "V2SIGMATAU Fails: Kernel is " << kern << ", builtin device = " << v2sigmatau[i] << ", builtin = " << v2sigmatau_ref[i] ); + bool is_close = (v2sigmatau[i] == Approx(v2sigmatau_ref[i]) || v2sigmatau[i] == Approx(v2sigmatau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2lapl2; ++i ) { + INFO( "V2LAPL2 Fails: Kernel is " << kern << ", builtin device = " << v2lapl2[i] << ", builtin = " << v2lapl2_ref[i] ); + bool is_close = (v2lapl2[i] == Approx(v2lapl2_ref[i]) || v2lapl2[i] == Approx(v2lapl2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + VXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small(kern)) continue; - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2lapltau; ++i ) { + INFO( "V2LAPLTAU Fails: Kernel is " << kern << ", builtin device = " << v2lapltau[i] << ", builtin = " << v2lapltau_ref[i] ); + bool is_close = (v2lapltau[i] == Approx(v2lapltau_ref[i]) || v2lapltau[i] == Approx(v2lapltau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2tau2; ++i ) { + INFO( "V2TAU2 Fails: Kernel is " << kern << ", builtin device = " << v2tau2[i] << ", builtin = " << v2tau2_ref[i] ); + bool is_close = (v2tau2[i] == Approx(v2tau2_ref[i]) || v2tau2[i] == Approx(v2tau2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("VXC + FXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_unstable_small_2nd_deriv_device(kern)) continue; - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized ); - } + } else if( interface == TestInterface::FXC_INC or interface == TestInterface::VXC_FXC_INC ) { + for( auto i = 0ul; i < len_v2rho2; ++i ) { + INFO( "V2RHO2 Fails: Kernel is " << kern << ", builtin device = " << v2rho2[i] << ", builtin = " << v2rho2_ref[i] ); + bool is_close = (v2rho2[i] == Approx(fill_val_v2rho2 + alpha * v2rho2_ref[i]) || v2rho2[i] == Approx(fill_val_v2rho2 + alpha * v2rho2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2rhosigma; ++i ) { + INFO( "V2RHOSIGMA Fails: Kernel is " << kern << ", builtin device = " << v2rhosigma[i] << ", builtin = " << v2rhosigma_ref[i] ); + bool is_close = (v2rhosigma[i] == Approx(fill_val_v2rhosigma + alpha * v2rhosigma_ref[i]) || v2rhosigma[i] == Approx(fill_val_v2rhosigma + alpha * v2rhosigma_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + VXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2rholapl; ++i ) { + INFO( "V2RHOLAPL Fails: Kernel is " << kern << ", builtin device = " << v2rholapl[i] << ", builtin = " << v2rholapl_ref[i] ); + bool is_close = (v2rholapl[i] == Approx(fill_val_v2rholapl + alpha * v2rholapl_ref[i]) || v2rholapl[i] == Approx(fill_val_v2rholapl + alpha * v2rholapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2rhotau; ++i ) { + INFO( "V2RHOTAU Fails: Kernel is " << kern << ", builtin device = " << v2rhotau[i] << ", builtin = " << v2rhotau_ref[i] ); + bool is_close = (v2rhotau[i] == Approx(fill_val_v2rhotau + alpha * v2rhotau_ref[i]) || v2rhotau[i] == Approx(fill_val_v2rhotau + alpha * v2rhotau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("VXC + FXC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2sigma2; ++i ) { + INFO( "V2SIGMA2 Fails: Kernel is " << kern << ", builtin device = " << v2sigma2[i] << ", builtin = " << v2sigma2_ref[i] ); + bool is_close = (v2sigma2[i] == Approx(fill_val_v2sigma2 + alpha * v2sigma2_ref[i]) || v2sigma2[i] == Approx(fill_val_v2sigma2 + alpha * v2sigma2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2sigmalapl; ++i ) { + INFO( "V2SIGMALAPL Fails: Kernel is " << kern << ", builtin device = " << v2sigmalapl[i] << ", builtin = " << v2sigmalapl_ref[i] ); + bool is_close = (v2sigmalapl[i] == Approx(fill_val_v2sigmalapl + alpha * v2sigmalapl_ref[i]) || v2sigmalapl[i] == Approx(fill_val_v2sigmalapl + alpha * v2sigmalapl_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("EXC + VXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_cuda_hip_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); + for( auto i = 0ul; i < len_v2sigmatau; ++i ) { + INFO( "V2SIGMATAU Fails: Kernel is " << kern << ", builtin device = " << v2sigmatau[i] << ", builtin = " << v2sigmatau_ref[i] ); + bool is_close = (v2sigmatau[i] == Approx(fill_val_v2sigmatau + alpha * v2sigmatau_ref[i]) || v2sigmatau[i] == Approx(fill_val_v2sigmatau + alpha * v2sigmatau_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("FXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2lapl2; ++i ) { + INFO( "V2LAPL2 Fails: Kernel is " << kern << ", builtin device = " << v2lapl2[i] << ", builtin = " << v2lapl2_ref[i] ); + bool is_close = (v2lapl2[i] == Approx(fill_val_v2lapl2 + alpha * v2lapl2_ref[i]) || v2lapl2[i] == Approx(fill_val_v2lapl2 + alpha * v2lapl2_ref[i]).margin(1e-11)); + CHECK( is_close ); } - - SECTION("VXC + FXC + INC Zero: Polarized") { - for( auto kern : builtin_supported_kernels ) { - if(is_deorbitalized(kern)) continue; - test_cuda_hip_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized ); - } + for( auto i = 0ul; i < len_v2lapltau; ++i ) { + INFO( "V2LAPLTAU Fails: Kernel is " << kern << ", builtin device = " << v2lapltau[i] << ", builtin = " << v2lapltau_ref[i] ); + bool is_close = (v2lapltau[i] == Approx(fill_val_v2lapltau + alpha * v2lapltau_ref[i]) || v2lapltau[i] == Approx(fill_val_v2lapltau + alpha * v2lapltau_ref[i]).margin(1e-11)); + CHECK( is_close ); } + for( auto i = 0ul; i < len_v2tau2; ++i ) { + INFO( "V2TAU2 Fails: Kernel is " << kern << ", builtin device = " << v2tau2[i] << ", builtin = " << v2tau2_ref[i] ); + bool is_close = (v2tau2[i] == Approx(fill_val_v2tau2 + alpha * v2tau2_ref[i]) || v2tau2[i] == Approx(fill_val_v2tau2 + alpha * v2tau2_ref[i]).margin(1e-11)); + CHECK( is_close ); + } + } + // Free device memory + sycl_free_all( q, rho_device, sigma_device, exc_device, vrho_device, vsigma_device, lapl_device, tau_device, + vlapl_device, vtau_device, + v2rho2_device, v2rhosigma_device, v2rholapl_device, v2rhotau_device, + v2sigma2_device, v2sigmalapl_device, v2sigmatau_device, + v2lapl2_device, v2lapltau_device, v2tau2_device ); - } - - -} - -#endif - - - -#ifdef EXCHCXX_ENABLE_SYCL - -template -T* safe_sycl_malloc( size_t n, sycl::queue& q ) { - if( n ) { - T* ptr = sycl::malloc_device(n, q); - return ptr; - } else return nullptr; -} - -template -void safe_sycl_cpy( T* dest, const T* src, size_t len, sycl::queue& q ) { - - q.memcpy( (void*)dest, (const void*)src, len*sizeof(T) ); - -} - -void sycl_free_all(sycl::queue&){ } -template -void sycl_free_all( sycl::queue& q, T* ptr, Args&&... args ) { - - if( ptr ) { - sycl::free( (void*)ptr, q ); - } - - sycl_free_all( q, std::forward(args)... ); - -} - -void device_synchronize( sycl::queue& q ) { -q.wait_and_throw(); } +#endif // EXCHCXX_ENABLE_SYCL -void test_sycl_interface( TestInterface interface, EvalType evaltype, - Backend backend, Kernel kern, Spin polar, sycl::queue& q ) { - - auto [npts_lda, ref_rho] = load_reference_density( polar ); - auto [npts_gga, ref_sigma] = load_reference_sigma ( polar ); - - REQUIRE( npts_lda == npts_gga ); - - const int npts = npts_lda; - - XCKernel func( backend, kern, polar ); - - size_t len_rho_buffer = func.rho_buffer_len(npts); - size_t len_sigma_buffer = func.sigma_buffer_len(npts); - size_t len_exc_buffer = func.exc_buffer_len(npts); - size_t len_vrho_buffer = func.vrho_buffer_len(npts); - size_t len_vsigma_buffer = func.vsigma_buffer_len(npts); - - - std::vector rho_small(len_rho_buffer, 1e-13); - std::vector sigma_small(len_sigma_buffer, 1e-14); - - std::vector rho_zero(len_rho_buffer, 0.); - std::vector sigma_zero(len_sigma_buffer, 0.); - - std::vector rho, sigma; +#ifdef EXCHCXX_ENABLE_DEVICE +TEST_CASE( "GPU Interfaces", "[xc-device]" ) { - if( evaltype == EvalType::Regular ) { - rho = ref_rho; - sigma = ref_sigma; - } + SECTION( "Libxc Functionals" ) { - if( evaltype == EvalType::Small ) { - rho = rho_small; - sigma = sigma_small; - } - if( evaltype == EvalType::Zero ) { - rho = rho_zero; - sigma = sigma_zero; - } + SECTION( "LDA Functionals: EXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - // Get Reference Values - std::vector - exc_ref( len_exc_buffer ), - vrho_ref( len_vrho_buffer ), - vsigma_ref( len_vsigma_buffer ); + SECTION( "LDA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - if( interface == TestInterface::EXC or interface == TestInterface::EXC_INC ) { + SECTION( "LDA Functionals: FXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc( npts, rho.data(), exc_ref.data() ); - else if( func.is_gga() ) - func.eval_exc( npts, rho.data(), sigma.data(), exc_ref.data() ); + SECTION( "LDA Functionals: VXC + FXC Regular Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_VXC or interface == TestInterface::EXC_VXC_INC ) { + SECTION( "GGA Functionals: EXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - if( func.is_lda() ) - func.eval_exc_vxc( npts, rho.data(), exc_ref.data(), vrho_ref.data() ); - else if( func.is_gga() ) - func.eval_exc_vxc( npts, rho.data(), sigma.data(), exc_ref.data(), - vrho_ref.data(), vsigma_ref.data() ); + SECTION( "GGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } - } + SECTION( "GGA Functionals: FXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } + SECTION( "GGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } + SECTION( "MGGA Functionals: EXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + SECTION( "MGGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + SECTION( "MGGA Functionals: FXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } + SECTION( "MGGA Functionals: VXC + FXC Regular Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - // Allocate device memory - double* rho_device = safe_sycl_malloc( len_rho_buffer , q ); - double* sigma_device = safe_sycl_malloc( len_sigma_buffer , q ); - double* exc_device = safe_sycl_malloc( len_exc_buffer , q ); - double* vrho_device = safe_sycl_malloc( len_vrho_buffer , q ); - double* vsigma_device = safe_sycl_malloc( len_vsigma_buffer, q ); - // H2D Copy of rho / sigma - safe_sycl_cpy( rho_device, rho.data(), len_rho_buffer, q ); - if( func.is_gga() ) - safe_sycl_cpy( sigma_device, sigma.data(), len_sigma_buffer, q ); - const double alpha = 3.14; - const double fill_val_e = 2.; - const double fill_val_vr = 10.; - const double fill_val_vs = 50.; + SECTION( "LDA Functionals: EXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - std::vector - exc( len_exc_buffer, fill_val_e ), vrho( len_vrho_buffer, fill_val_vr ), - vsigma( len_vsigma_buffer, fill_val_vs ); + SECTION( "LDA Functionals: EXC + VXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - // H2D copy of initial values, tests clobber / increment - safe_sycl_cpy( exc_device, exc.data(), len_exc_buffer, q ); - safe_sycl_cpy( vrho_device, vrho.data(), len_vrho_buffer, q ); - if( func.is_gga() ) - safe_sycl_cpy( vsigma_device, vsigma.data(), len_vsigma_buffer, q ); + SECTION( "LDA Functionals: FXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - q.wait(); + SECTION( "LDA Functionals: VXC + FXC Small Eval Unpolarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - // Evaluate functional on device - if( interface == TestInterface::EXC ) { + SECTION( "GGA Functionals: EXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc_device( npts, rho_device, exc_device, &q ); - else if( func.is_gga() ) - func.eval_exc_device( npts, rho_device, sigma_device, exc_device, - &q ); + SECTION( "GGA Functionals: EXC + VXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_INC ) { + SECTION( "GGA Functionals: FXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc_inc_device( alpha, npts, rho_device, exc_device, &q ); - else if( func.is_gga() ) - func.eval_exc_inc_device( alpha, npts, rho_device, sigma_device, exc_device, - &q ); + SECTION( "GGA Functionals: VXC + FXC Small Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_VXC ) { + SECTION( "MGGA Functionals: EXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( func.is_lda() ) - func.eval_exc_vxc_device( npts, rho_device, exc_device, vrho_device, &q ); - else if( func.is_gga() ) - func.eval_exc_vxc_device( npts, rho_device, sigma_device, exc_device, - vrho_device, vsigma_device, &q ); + SECTION( "MGGA Functionals: EXC + VXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } else if( interface == TestInterface::EXC_VXC_INC ) { + SECTION( "MGGA Functionals: FXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; - if( func.is_lda() ) - func.eval_exc_vxc_inc_device( alpha, npts, rho_device, exc_device, - vrho_device, &q ); - else if( func.is_gga() ) - func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, - exc_device, vrho_device, vsigma_device, &q ); + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - } + SECTION( "MGGA Functionals: VXC + FXC Small Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - device_synchronize( q ); + SECTION( "LDA Functionals: EXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - // D2H of results - safe_sycl_cpy( exc.data(), exc_device, len_exc_buffer, q ); - safe_sycl_cpy( vrho.data(), vrho_device, len_vrho_buffer, q ); - if(func.is_gga()) - safe_sycl_cpy( vsigma.data(), vsigma_device, len_vsigma_buffer, q ); + SECTION( "LDA Functionals: EXC + VXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - device_synchronize( q ); - // Check correctness - if( interface == TestInterface::EXC_INC or interface == TestInterface::EXC_VXC_INC ) { - for( auto i = 0ul; i < len_exc_buffer; ++i ) - CHECK( exc[i] == Approx(fill_val_e + alpha * exc_ref[i]) ); - } else { - for( auto i = 0ul; i < len_exc_buffer; ++i ) - CHECK( exc[i] == Approx(exc_ref[i]) ); - } + SECTION( "LDA Functionals: FXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - if( interface == TestInterface::EXC_VXC_INC ) { + SECTION( "LDA Functionals: VXC + FXC Zero Eval Unpolarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - for( auto i = 0ul; i < len_vrho_buffer; ++i ) - CHECK( vrho[i] == Approx(fill_val_vr + alpha * vrho_ref[i]) ); - for( auto i = 0ul; i < len_vsigma_buffer; ++i ) - CHECK( vsigma[i] == Approx(fill_val_vs + alpha * vsigma_ref[i]) ); + SECTION( "GGA Functionals: EXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - } else if(interface == TestInterface::EXC_VXC) { + SECTION( "GGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - for( auto i = 0ul; i < len_vrho_buffer; ++i ) - CHECK( vrho[i] == Approx(vrho_ref[i]) ); - for( auto i = 0ul; i < len_vsigma_buffer; ++i ) { - INFO( "Kernel is " << kern ); - CHECK( vsigma[i] == Approx(vsigma_ref[i]) ); + SECTION( "GGA Functionals: FXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } } - } + SECTION( "GGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { + for( auto kern : gga_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - device_synchronize( q ); - sycl_free_all( q, rho_device, sigma_device, exc_device, vrho_device, - vsigma_device ); + SECTION( "MGGA Functionals: EXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } - device_synchronize( q ); -} + SECTION( "MGGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + SECTION( "MGGA Functionals: FXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } -#if 0 -struct SYCLTestFeature { - sycl::queue q; - SYCLTestFeature() : - q( sycl::gpu_selector_v, - sycl::property_list{sycl::property::queue::in_order{}} ) { } -}; -#else -struct SYCLTestFeature { - static sycl::queue q; + SECTION( "MGGA Functionals: VXC + FXC Zero Eval Unpolarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Unpolarized ); + } + } - SYCLTestFeature() {} -}; -sycl::queue SYCLTestFeature::q( - sycl::gpu_selector_v, - sycl::property_list{sycl::property::queue::in_order{}} ); -#endif -TEST_CASE_METHOD( SYCLTestFeature, "SYCL Interfaces", "[xc-device]" ) { - //std::cout << "Running on " - // << q.get_device().get_info() - // << "\n"; - SECTION( "Libxc Functionals" ) { - SECTION( "LDA Functionals: EXC Regular Eval Unpolarized" ) { + SECTION( "LDA Functionals: EXC Regular Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - - SECTION( "LDA Functionals: EXC + VXC Regular Eval Unpolarized" ) { + SECTION( "LDA Functionals: EXC + VXC Regular Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "LDA Functionals: FXC Regular Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Regular Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "LDA Functionals: VXC + FXC Regular Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "LDA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: EXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - - SECTION( "LDA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: EXC + VXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: FXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Small Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "GGA Functionals: VXC + FXC Regular Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "LDA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "MGGA Functionals: EXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); } + SECTION( "MGGA Functionals: EXC + VXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } - SECTION( "LDA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "MGGA Functionals: FXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "MGGA Functionals: VXC + FXC Regular Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Zero Eval Unpolarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Unpolarized, q ); + SECTION( "LDA Functionals: EXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } + SECTION( "LDA Functionals: EXC + VXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "LDA Functionals: FXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "LDA Functionals: VXC + FXC Small Eval Polarized" ) { + for( auto kern : lda_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "GGA Functionals: EXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "GGA Functionals: EXC + VXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } + SECTION( "GGA Functionals: FXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } - SECTION( "LDA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "GGA Functionals: VXC + FXC Small Eval Polarized" ) { + for( auto kern : gga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } + SECTION( "MGGA Functionals: EXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } + } - SECTION( "LDA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: EXC + VXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: FXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Regular Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: VXC + FXC Small Eval Polarized" ) { + for( auto kern : mgga_kernels ) { + if(is_unstable_small(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "LDA Functionals: EXC Small Eval Polarized" ) { + SECTION( "LDA Functionals: EXC Zero Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "LDA Functionals: EXC + VXC Small Eval Polarized" ) { + SECTION( "LDA Functionals: EXC + VXC Zero Eval Polarized" ) { for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Small Eval Polarized" ) { + SECTION( "LDA Functionals: FXC Zero Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "LDA Functionals: VXC + FXC Zero Eval Polarized" ) { + for( auto kern : lda_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "GGA Functionals: EXC Zero Eval Polarized" ) { for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC + VXC Small Eval Polarized" ) { + SECTION( "GGA Functionals: EXC + VXC Zero Eval Polarized" ) { for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::libxc, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "LDA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "GGA Functionals: FXC Zero Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "GGA Functionals: VXC + FXC Zero Eval Polarized" ) { + for( auto kern : gga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } + } + + SECTION( "MGGA Functionals: EXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - - SECTION( "LDA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : lda_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: EXC + VXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ) + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); } - SECTION( "GGA Functionals: EXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: FXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } } - SECTION( "GGA Functionals: EXC + VXC Zero Eval Polarized" ) { - for( auto kern : gga_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::libxc, kern, Spin::Polarized, q ); + SECTION( "MGGA Functionals: VXC + FXC Zero Eval Polarized" ) { + for( auto kern : mgga_kernels ){ + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::libxc, kern, Spin::Polarized ); + } } } @@ -3477,155 +3267,365 @@ TEST_CASE_METHOD( SYCLTestFeature, "SYCL Interfaces", "[xc-device]" ) { SECTION( "Builtin Functionals" ) { SECTION("EXC Regular: Unpolarized") { - //std::cout << "EXC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC Regular: Unpolarized") { - //std::cout << "EXC + VXC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + INC Regular: Unpolarized") { - //std::cout << "EXC + INC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC + INC Regular: Unpolarized") { - //std::cout << "EXC + VXC + INC Regular: Unpolarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC + INC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC + INC Regular: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + VXC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("FXC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + VXC + INC Small: Unpolarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Unpolarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("FXC + INC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC + INC Small: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } } SECTION("EXC + INC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); } SECTION("EXC + VXC + INC Zero: Unpolarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Unpolarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + + SECTION("FXC + INC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } + } + + SECTION("VXC + FXC + INC Zero: Unpolarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Unpolarized ); + } } + + SECTION("EXC Regular: Polarized") { - //std::cout << "EXC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC Regular: Polarized") { - //std::cout << "EXC + VXC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + INC Regular: Polarized") { - //std::cout << "EXC + INC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC + INC Regular: Polarized") { - //std::cout << "EXC + VXC + INC Regular: Polarized" << std::endl; for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC + INC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC + INC Regular: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Regular, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + VXC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("FXC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + VXC + INC Small: Polarized") { - for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Small, - Backend::builtin, kern, Spin::Polarized, q ); + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small(kern)) continue; + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("FXC + INC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC + INC Small: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_unstable_small_2nd_deriv_device(kern)) continue; + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Small, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } } SECTION("EXC + INC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); } SECTION("EXC + VXC + INC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) - test_sycl_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, - Backend::builtin, kern, Spin::Polarized, q ); + test_device_interface( TestInterface::EXC_VXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + + SECTION("FXC + INC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } + } + + SECTION("VXC + FXC + INC Zero: Polarized") { + for( auto kern : builtin_supported_kernels ) { + if(is_deorbitalized(kern)) continue; + test_device_interface( TestInterface::VXC_FXC_INC, EvalType::Zero, + Backend::builtin, kern, Spin::Polarized ); + } } } @@ -3633,4 +3633,4 @@ TEST_CASE_METHOD( SYCLTestFeature, "SYCL Interfaces", "[xc-device]" ) { } -#endif +#endif // EXCHCXX_ENABLE_DEVICE From 0d13be1ea1741c7c26c6f281bd0e45527850ba8b Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Tue, 23 Sep 2025 22:47:31 +0000 Subject: [PATCH 02/14] [SYCL] replace sycl::printf with printf --- .../impl/builtin/kernels/deorbitalized.hpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp index 8a3a233..025efd0 100644 --- a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp +++ b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp @@ -141,10 +141,8 @@ struct kernel_traits> { double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels"); @@ -172,10 +170,8 @@ struct kernel_traits> { double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, vrho_a, vrho_b, vsigma_aa, vsigma_ab, vsigma_bb, vlapl_a, vlapl_b, vtau_a, vtau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_vxc_fxc_polar not implemented for deorbitalized kernels"); @@ -188,10 +184,8 @@ struct kernel_traits> { double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_fxc_unpolar not implemented for deorbitalized kernels"); @@ -216,10 +210,8 @@ struct kernel_traits> { double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_fxc_polar not implemented for deorbitalized kernels"); From a296a72e1a477e9d79184666bf7ff6209b9a3a1d Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Tue, 23 Sep 2025 23:01:17 +0000 Subject: [PATCH 03/14] Revert "[SYCL] replace sycl::printf with printf" This reverts commit 0d13be1ea1741c7c26c6f281bd0e45527850ba8b. --- .../impl/builtin/kernels/deorbitalized.hpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp index 025efd0..8a3a233 100644 --- a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp +++ b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp @@ -141,8 +141,10 @@ struct kernel_traits> { double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels"); @@ -170,8 +172,10 @@ struct kernel_traits> { double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, vrho_a, vrho_b, vsigma_aa, vsigma_ab, vsigma_bb, vlapl_a, vlapl_b, vtau_a, vtau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_vxc_fxc_polar not implemented for deorbitalized kernels"); @@ -184,8 +188,10 @@ struct kernel_traits> { double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_fxc_unpolar not implemented for deorbitalized kernels"); @@ -210,8 +216,10 @@ struct kernel_traits> { double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); + #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) + sycl::ext::oneapi::experimental::printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_fxc_polar not implemented for deorbitalized kernels"); From 71ecc1314a092732b02d4324765ade0e1e9c3107 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Wed, 24 Sep 2025 15:27:36 +0000 Subject: [PATCH 04/14] [SYCL] add AoT build options for SYCL backend --- CMakeLists.txt | 16 +++++++++++----- src/sycl/exchcxx_sycl.cmake | 13 +++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d604c3..9856aff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,12 @@ if( EXCHCXX_ENABLE_SYCL AND EXCHCXX_ENABLE_HIP ) endif() +if(EXCHCXX_ENABLE_SYCL) + # e.g. intel_gpu_pvc | nvidia_gpu_sm_80 | nvidia_gpu_sm_90 | amd_gpu_gfx90a | amd_gpu_gfx942 + set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Alias for -fsycl-targets (see Users Manual)") +endif() + + # Append local cmake directory to find CMAKE Modules if( CMAKE_MODULE_PATH ) list( APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") @@ -69,7 +75,7 @@ if( ${Libxc_FOUND} ) else() option( FETCHCONTENT_LIBXC_GIT_SHALLOW "Whether to use GIT_SHALLOW for FetchContent'ing libxc" ON ) - + FetchContent_Declare( libxc GIT_REPOSITORY https://gitlab.com/libxc/libxc.git @@ -85,8 +91,8 @@ else() FetchContent_MakeAvailable( libxc ) add_library( Libxc::xc ALIAS xc ) - target_include_directories( xc - PUBLIC + target_include_directories( xc + PUBLIC $ $ $ @@ -98,7 +104,7 @@ else() set_target_properties(xc PROPERTIES UNITY_BUILD OFF) message(STATUS "Will disable unity-build for Libxc::xc") endif() - + set( BUILD_TESTING ${OLD_BUILD_TESTING} CACHE BOOL "" FORCE ) endif() @@ -106,7 +112,7 @@ endif() else( EXCHCXX_ENABLE_LIBXC ) set( Libxc_FOUND FALSE ) endif( EXCHCXX_ENABLE_LIBXC ) - + add_subdirectory( src ) # Testing diff --git a/src/sycl/exchcxx_sycl.cmake b/src/sycl/exchcxx_sycl.cmake index 9a1bd5f..ec615ff 100644 --- a/src/sycl/exchcxx_sycl.cmake +++ b/src/sycl/exchcxx_sycl.cmake @@ -13,6 +13,19 @@ list( APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ) find_package( SYCL REQUIRED ) target_link_libraries( exchcxx PUBLIC SYCL::SYCL ) +# --- AoT-builds SYCL target alias pass-through --- +if(EXCHCXX_ENABLE_SYCL) + if(NOT EXCHCXX_SYCL_TARGET) + message(FATAL_ERROR + "EXCHCXX_SYCL_TARGET is required. Examples: " + "intel_gpu_pvc | nvidia_gpu_sm_80 | nvidia_gpu_sm_90 | amd_gpu_gfx90a | amd_gpu_gfx942") + endif() + + # Apply ONLY to this target (both compile & link) + target_compile_options(exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET}) + target_link_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET}) +endif() + target_compile_options(exchcxx PRIVATE $<$:-ffp-model=precise>) target_link_options(exchcxx PRIVATE -fsycl-max-parallel-link-jobs=20) From 4184d5dc3c03d1514803f6afb8441ce35192394a Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Thu, 25 Sep 2025 17:07:57 +0000 Subject: [PATCH 05/14] [SYCL] fix sycl kernel names --- src/sycl/builtin_sycl.cxx | 96 +++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/src/sycl/builtin_sycl.cxx b/src/sycl/builtin_sycl.cxx index 1ff6f30..d56b4a3 100644 --- a/src/sycl/builtin_sycl.cxx +++ b/src/sycl/builtin_sycl.cxx @@ -979,7 +979,7 @@ __attribute__((always_inline)) MGGA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL(device_eva template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_helper_unpolar_kernel( N, rho, eps, tid); }); @@ -989,7 +989,7 @@ LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { template LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_helper_polar_kernel( N, rho, eps, tid); }); @@ -999,7 +999,7 @@ LDA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_helper_unpolar_kernel( N, rho, eps, vxc, tid); }); @@ -1009,7 +1009,7 @@ LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { template LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_helper_polar_kernel( N, rho, eps, vxc, tid); }); @@ -1019,7 +1019,7 @@ LDA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { template LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_helper_unpolar_kernel( N, rho, fxc, tid); }); @@ -1029,7 +1029,7 @@ LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { template LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_helper_polar_kernel( N, rho, fxc, tid); }); @@ -1038,7 +1038,7 @@ LDA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { template LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_helper_unpolar_kernel( N, rho, vxc, fxc, tid); }); @@ -1048,7 +1048,7 @@ LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { template LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_helper_polar_kernel( N, rho, vxc, fxc, tid); }); @@ -1058,7 +1058,7 @@ LDA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_inc_helper_unpolar_kernel( scal_fact, N, rho, eps, tid); }); @@ -1068,7 +1068,7 @@ LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { template LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_inc_helper_polar_kernel( scal_fact, N, rho, eps, tid); }); @@ -1078,7 +1078,7 @@ LDA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_inc_helper_unpolar_kernel( scal_fact, N, rho, eps, vxc, tid); }); @@ -1088,7 +1088,7 @@ LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { template LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_inc_helper_polar_kernel( scal_fact, N, rho, eps, vxc, tid); }); @@ -1098,7 +1098,7 @@ LDA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_inc_helper_unpolar_kernel( scal_fact, N, rho, fxc, tid); }); @@ -1108,7 +1108,7 @@ LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { template LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_inc_helper_polar_kernel( scal_fact, N, rho, fxc, tid); }); @@ -1118,7 +1118,7 @@ LDA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { template LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_inc_helper_unpolar_kernel( scal_fact, N, rho, vxc, fxc, tid); }); @@ -1128,7 +1128,7 @@ LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { template LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_inc_helper_polar_kernel( scal_fact, N, rho, vxc, fxc, tid); }); @@ -1141,7 +1141,7 @@ LDA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_helper_unpolar_kernel( N, rho, sigma, eps, tid); }); @@ -1151,7 +1151,7 @@ GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { template GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_helper_polar_kernel( N, rho, sigma, eps, tid); }); @@ -1161,7 +1161,7 @@ GGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_helper_unpolar_kernel( N, rho, sigma, eps, vrho, vsigma, tid); }); @@ -1171,7 +1171,7 @@ GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { template GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_helper_polar_kernel( N, rho, sigma, eps, vrho, vsigma, tid); }); @@ -1181,7 +1181,7 @@ GGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { template GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_helper_unpolar_kernel( N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1191,7 +1191,7 @@ GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { template GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_helper_polar_kernel( N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1201,7 +1201,7 @@ GGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { template GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_helper_unpolar_kernel( N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1211,7 +1211,7 @@ GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { template GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_helper_polar_kernel( N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1221,7 +1221,7 @@ GGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, eps, tid); }); @@ -1231,7 +1231,7 @@ GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { template GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, eps, tid); }); @@ -1241,7 +1241,7 @@ GGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, eps, vrho, vsigma, tid); }); @@ -1251,7 +1251,7 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { template GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, eps, vrho, vsigma, tid); }); @@ -1262,7 +1262,7 @@ GGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1271,7 +1271,7 @@ GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { template GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1280,7 +1280,7 @@ GGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { template GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1289,7 +1289,7 @@ GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { template GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, tid); }); @@ -1299,7 +1299,7 @@ GGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_helper_unpolar_kernel( N, rho, sigma, lapl, tau, eps, tid); }); @@ -1309,7 +1309,7 @@ MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_unpolar ) { template MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_helper_polar_kernel( N, rho, sigma, lapl, tau, eps, tid); }); @@ -1319,7 +1319,7 @@ MGGA_EXC_GENERATOR_DEVICE( device_eval_exc_helper_polar ) { template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_helper_unpolar_kernel( N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); }); @@ -1329,7 +1329,7 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_unpolar ) { template MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_helper_polar_kernel( N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); }); @@ -1339,7 +1339,7 @@ MGGA_EXC_VXC_GENERATOR_DEVICE( device_eval_exc_vxc_helper_polar ) { template MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_helper_unpolar_kernel( N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, tid); @@ -1350,7 +1350,7 @@ MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_unpolar ) { template MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_helper_polar_kernel( N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, tid); @@ -1361,7 +1361,7 @@ MGGA_FXC_GENERATOR_DEVICE( device_eval_fxc_helper_polar ) { template MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_helper_unpolar_kernel( N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, @@ -1374,7 +1374,7 @@ MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_unpolar ) { template MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_helper_polar_kernel( N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, @@ -1387,7 +1387,7 @@ MGGA_VXC_FXC_GENERATOR_DEVICE( device_eval_vxc_fxc_helper_polar ) { template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, lapl, tau, eps, tid); }); @@ -1397,7 +1397,7 @@ MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_unpolar ) { template MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, lapl, tau, eps, tid); }); @@ -1407,7 +1407,7 @@ MGGA_EXC_INC_GENERATOR_DEVICE( device_eval_exc_inc_helper_polar ) { template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); }); @@ -1417,7 +1417,7 @@ MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_unpolar ) { template MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_exc_vxc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, lapl, tau, eps, vrho, vsigma, vlapl, vtau, tid); }); @@ -1427,7 +1427,7 @@ MGGA_EXC_VXC_INC_GENERATOR_DEVICE( device_eval_exc_vxc_inc_helper_polar ) { template MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, @@ -1440,7 +1440,7 @@ MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_unpolar ) { template MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_fxc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, @@ -1453,7 +1453,7 @@ MGGA_FXC_INC_GENERATOR_DEVICE( device_eval_fxc_inc_helper_polar ) { template MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_inc_helper_unpolar_kernel( scal_fact, N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, @@ -1467,7 +1467,7 @@ MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_unpolar ) { template MGGA_VXC_FXC_INC_GENERATOR_DEVICE( device_eval_vxc_fxc_inc_helper_polar ) { - queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { + queue->parallel_for>( sycl::range<1>(N), [=](sycl::id<1> tid) { device_eval_vxc_fxc_inc_helper_polar_kernel( scal_fact, N, rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, From 460737dcc9d4b1d0a0ea6b6b3bb6607bf3d06ee1 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Thu, 25 Sep 2025 17:19:17 +0000 Subject: [PATCH 06/14] [SYCL] AoT compilation options --- src/sycl/exchcxx_sycl.cmake | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/sycl/exchcxx_sycl.cmake b/src/sycl/exchcxx_sycl.cmake index ec615ff..c32265d 100644 --- a/src/sycl/exchcxx_sycl.cmake +++ b/src/sycl/exchcxx_sycl.cmake @@ -14,16 +14,29 @@ find_package( SYCL REQUIRED ) target_link_libraries( exchcxx PUBLIC SYCL::SYCL ) # --- AoT-builds SYCL target alias pass-through --- -if(EXCHCXX_ENABLE_SYCL) - if(NOT EXCHCXX_SYCL_TARGET) +set(_EXCHCXX_SYCL_ALLOWED + intel_gpu_pvc + nvidia_gpu_sm_80 + nvidia_gpu_sm_90 + amd_gpu_gfx90a + amd_gpu_gfx942) + +if(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "") + list(FIND _EXCHCXX_SYCL_ALLOWED "${EXCHCXX_SYCL_TARGET}" _exchcxx_sycl_idx) + if(_exchcxx_sycl_idx EQUAL -1) message(FATAL_ERROR - "EXCHCXX_SYCL_TARGET is required. Examples: " - "intel_gpu_pvc | nvidia_gpu_sm_80 | nvidia_gpu_sm_90 | amd_gpu_gfx90a | amd_gpu_gfx942") + "Invalid EXCHCXX_SYCL_TARGET='${EXCHCXX_SYCL_TARGET}'. " + "Allowed values: ${_EXCHCXX_SYCL_ALLOWED}") endif() # Apply ONLY to this target (both compile & link) - target_compile_options(exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET}) - target_link_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET}) + target_compile_options( exchcxx PRIVATE -fsycl-device-only -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) + target_link_options( exchcxx PRIVATE -fsycl-device-only -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) + message(STATUS "ExchCXX SYCL AoT enabled for target: ${EXCHCXX_SYCL_TARGET}") + + # target_compile_options( exchcxx PRIVATE -Wno-unused-parameter -Wno-unused-variable -fsycl-device-only -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" ) + # target_link_options( exchcxx PRIVATE -fsycl-device-only -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" ) + endif() target_compile_options(exchcxx PRIVATE $<$:-ffp-model=precise>) From 4102e6e29e0c232f0b1192761b7cab6d9ac98757 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Fri, 26 Sep 2025 17:13:01 +0000 Subject: [PATCH 07/14] [SYCL] remove fsycl-device-only flag --- src/sycl/exchcxx_sycl.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sycl/exchcxx_sycl.cmake b/src/sycl/exchcxx_sycl.cmake index c32265d..3b05354 100644 --- a/src/sycl/exchcxx_sycl.cmake +++ b/src/sycl/exchcxx_sycl.cmake @@ -30,8 +30,8 @@ if(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "") endif() # Apply ONLY to this target (both compile & link) - target_compile_options( exchcxx PRIVATE -fsycl-device-only -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) - target_link_options( exchcxx PRIVATE -fsycl-device-only -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) + target_compile_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) + target_link_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) message(STATUS "ExchCXX SYCL AoT enabled for target: ${EXCHCXX_SYCL_TARGET}") # target_compile_options( exchcxx PRIVATE -Wno-unused-parameter -Wno-unused-variable -fsycl-device-only -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" ) From 2105ab75f58e6c90bb495b3119b26b05717d2a3a Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 29 Sep 2025 16:40:17 +0000 Subject: [PATCH 08/14] [SYCL] fix PR comments --- README.md | 4 +- .../impl/builtin/kernels/deorbitalized.hpp | 36 +++--- include/exchcxx/impl/builtin/util.hpp | 83 ++++---------- include/exchcxx/util/exchcxx_macros.hpp | 32 +++--- src/sycl/builtin_sycl.cxx | 18 +-- src/sycl/libxc_device.cxx | 28 ++--- src/sycl/xc_functional_device.cxx | 14 +-- test/xc_kernel_test.cxx | 103 +++++++++--------- 8 files changed, 147 insertions(+), 171 deletions(-) diff --git a/README.md b/README.md index 4c9fc86..34617a3 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ small subset of XC functionals which may be evaluated either on the host (CPU) or device (GPU, FPGA, etc). Currently GPU support is provided through the [CUDA](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html) for NVIDIA GPUs, [HIP](https://rocmdocs.amd.com/en/latest/Programming_Guides/HIP-GUIDE.html) for -AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental) -for generic accelerator backends (including Intel GPUs). +AMD GPUs and [SYCL](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) (experimental, +supports only oneAPI implementaion) for generic accelerator backends (including Intel GPUs). ExchCXX is a work in progress. Its development has been funded by the U.S. diff --git a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp index 8a3a233..e00bc23 100644 --- a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp +++ b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -49,7 +49,7 @@ * in binary and source code form. */ -#pragma once +#pragma once #include #include #include @@ -165,9 +165,9 @@ struct kernel_traits> { double& v2rhotau_a_a, double& v2rhotau_a_b, double& v2rhotau_b_a, double& v2rhotau_b_b, double& v2sigma2_aa_aa, double& v2sigma2_aa_ab, double& v2sigma2_aa_bb, double& v2sigma2_ab_ab, double& v2sigma2_ab_bb, double& v2sigma2_bb_bb, - double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, + double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, double& v2sigmalapl_ab_b, double& v2sigmalapl_bb_a, double& v2sigmalapl_bb_b, - double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, + double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, double& v2sigmatau_ab_b, double& v2sigmatau_bb_a, double& v2sigmatau_bb_b, double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, @@ -185,8 +185,8 @@ struct kernel_traits> { BUILTIN_KERNEL_EVAL_RETURN eval_fxc_unpolar( double rho, double sigma, double lapl, double tau, - double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, - double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, + double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, + double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { #if defined(__CUDACC__) || defined(__HIPCC__) printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); @@ -199,8 +199,8 @@ struct kernel_traits> { } BUILTIN_KERNEL_EVAL_RETURN - eval_fxc_polar( double rho_a, double rho_b, - double sigma_aa, double sigma_ab, double sigma_bb, + eval_fxc_polar( double rho_a, double rho_b, + double sigma_aa, double sigma_ab, double sigma_bb, double lapl_a, double lapl_b, double tau_a, double tau_b, double& v2rho2_aa, double& v2rho2_ab, double& v2rho2_bb, double& v2rhosigma_a_aa, double& v2rhosigma_a_ab, double& v2rhosigma_a_bb, @@ -209,9 +209,9 @@ struct kernel_traits> { double& v2rhotau_a_a, double& v2rhotau_a_b, double& v2rhotau_b_a, double& v2rhotau_b_b, double& v2sigma2_aa_aa, double& v2sigma2_aa_ab, double& v2sigma2_aa_bb, double& v2sigma2_ab_ab, double& v2sigma2_ab_bb, double& v2sigma2_bb_bb, - double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, + double& v2sigmalapl_aa_a, double& v2sigmalapl_aa_b, double& v2sigmalapl_ab_a, double& v2sigmalapl_ab_b, double& v2sigmalapl_bb_a, double& v2sigmalapl_bb_b, - double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, + double& v2sigmatau_aa_a, double& v2sigmatau_aa_b, double& v2sigmatau_ab_a, double& v2sigmatau_ab_b, double& v2sigmatau_bb_a, double& v2sigmatau_bb_b, double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, diff --git a/include/exchcxx/impl/builtin/util.hpp b/include/exchcxx/impl/builtin/util.hpp index 747226f..6d9e1a9 100644 --- a/include/exchcxx/impl/builtin/util.hpp +++ b/include/exchcxx/impl/builtin/util.hpp @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -56,6 +56,14 @@ #include #include +#if defined(__CUDACC__) || defined(__HIPCC__) +#define EXCHCXX_READONLY_TABLE static __device__ +#elif defined(__SYCL_DEVICE_ONLY__) +#define EXCHCXX_READONLY_TABLE inline constexpr +#else +#define EXCHCXX_READONLY_TABLE static +#endif + namespace ExchCXX { @@ -118,7 +126,7 @@ SAFE_INLINE(auto) erf( T x ) { return sm::erf(x); } template SAFE_INLINE(auto) pow( T x, U e ) { return sm::pow(x,e); } template -SAFE_INLINE(auto) xc_erfcx( T x ) { return sm::exp(x*x)*sm::erfc(x); } +SAFE_INLINE(auto) xc_erfcx( T x ) { return sm::exp(x*x)*sm::erfc(x); } @@ -139,16 +147,8 @@ SAFE_INLINE(auto) xc_cheb_eval(const double x, const double *cs, const int N) return 0.5*(b0 - b2); } - // The following data is taken from libxc -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ static -#elif defined(__SYCL_DEVICE_ONLY__) -inline constexpr -#else -static -#endif -double AE11_data[39] = { +EXCHCXX_READONLY_TABLE double AE11_data[39] = { 0.121503239716065790, -0.065088778513550150, 0.004897651357459670, -0.000649237843027216, 0.000093840434587471, 0.000000420236380882, -0.000008113374735904, 0.000002804247688663, 0.000000056487164441, -0.000000344809174450, 0.000000058209273578, 0.000000038711426349, -0.000000012453235014, -0.000000005118504888, 0.000000002148771527, @@ -159,14 +159,7 @@ double AE11_data[39] = { -0.000000000000000024, -0.000000000000000201, -0.000000000000000082, 0.000000000000000017 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ static -#elif defined(__SYCL_DEVICE_ONLY__) -inline constexpr -#else -static -#endif -double AE12_data[25] = { +EXCHCXX_READONLY_TABLE double AE12_data[25] = { 0.582417495134726740, -0.158348850905782750, -0.006764275590323141, 0.005125843950185725, 0.000435232492169391, -0.000143613366305483, -0.000041801320556301, -0.000002713395758640, 0.000001151381913647, 0.000000420650022012, 0.000000066581901391, 0.000000000662143777, -0.000000002844104870, -0.000000000940724197, -0.000000000177476602, @@ -174,42 +167,21 @@ double AE12_data[25] = { 0.000000000000010707, -0.000000000000000537, -0.000000000000000716, -0.000000000000000244, -0.000000000000000058 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ static -#elif defined(__SYCL_DEVICE_ONLY__) -inline constexpr -#else -static -#endif -double E11_data[19] = { +EXCHCXX_READONLY_TABLE double E11_data[19] = { -16.11346165557149402600, 7.79407277874268027690, -1.95540581886314195070, 0.37337293866277945612, -0.05692503191092901938, 0.00721107776966009185, -0.00078104901449841593, 0.00007388093356262168, -0.00000620286187580820, 0.00000046816002303176, -0.00000003209288853329, 0.00000000201519974874, -0.00000000011673686816, 0.00000000000627627066, -0.00000000000031481541, 0.00000000000001479904, -0.00000000000000065457, 0.00000000000000002733, -0.00000000000000000108 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ static -#elif defined(__SYCL_DEVICE_ONLY__) -inline constexpr -#else -static -#endif -double E12_data[16] = { +EXCHCXX_READONLY_TABLE double E12_data[16] = { -0.03739021479220279500, 0.04272398606220957700, -0.13031820798497005440, 0.01441912402469889073, -0.00134617078051068022, 0.00010731029253063780, -0.00000742999951611943, 0.00000045377325690753, -0.00000002476417211390, 0.00000000122076581374, -0.00000000005485141480, 0.00000000000226362142, -0.00000000000008635897, 0.00000000000000306291, -0.00000000000000010148, 0.00000000000000000315 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ static -#elif defined(__SYCL_DEVICE_ONLY__) -inline constexpr -#else -static -#endif -double AE13_data[25] = { +EXCHCXX_READONLY_TABLE double AE13_data[25] = { -0.605773246640603460, -0.112535243483660900, 0.013432266247902779, -0.001926845187381145, 0.000309118337720603, -0.000053564132129618, 0.000009827812880247, -0.000001885368984916, 0.000000374943193568, -0.000000076823455870, 0.000000016143270567, -0.000000003466802211, 0.000000000758754209, -0.000000000168864333, 0.000000000038145706, @@ -217,14 +189,7 @@ double AE13_data[25] = { 0.000000000000006457, -0.000000000000001568, 0.000000000000000383, -0.000000000000000094, 0.000000000000000023 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ static -#elif defined(__SYCL_DEVICE_ONLY__) -inline constexpr -#else -static -#endif -double AE14_data[26] = { +EXCHCXX_READONLY_TABLE double AE14_data[26] = { -0.18929180007530170, -0.08648117855259871, 0.00722410154374659, -0.00080975594575573, 0.00010999134432661, -0.00001717332998937, 0.00000298562751447, -0.00000056596491457, 0.00000011526808397, -0.00000002495030440, 0.00000000569232420, -0.00000000135995766, 0.00000000033846628, -0.00000000008737853, 0.00000000002331588, diff --git a/include/exchcxx/util/exchcxx_macros.hpp b/include/exchcxx/util/exchcxx_macros.hpp index 2a6c5a7..7fbf0fa 100644 --- a/include/exchcxx/util/exchcxx_macros.hpp +++ b/include/exchcxx/util/exchcxx_macros.hpp @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -159,7 +159,7 @@ RET_GENERATOR_DEVICE( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_GENERATOR_DEVICE(func) \ RET_GENERATOR_DEVICE( LDA, VXC_FXC, func ) - + #define LDA_EXC_GENERATOR_DEVICE(func) \ void RET_LDA_EXC_GENERATOR_DEVICE(func) @@ -186,7 +186,7 @@ RET_INC_GENERATOR_DEVICE( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_INC_GENERATOR_DEVICE(func) \ RET_INC_GENERATOR_DEVICE( LDA, VXC_FXC, func ) - + #define LDA_EXC_INC_GENERATOR_DEVICE(func) \ void RET_LDA_EXC_INC_GENERATOR_DEVICE(func) @@ -217,7 +217,7 @@ RET_GENERATOR_SYCL_KERNEL( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_GENERATOR_SYCL_KERNEL(func) \ RET_GENERATOR_SYCL_KERNEL( LDA, VXC_FXC, func ) - + #define LDA_EXC_GENERATOR_SYCL_KERNEL(func) \ void RET_LDA_EXC_GENERATOR_SYCL_KERNEL(func) @@ -231,6 +231,7 @@ void RET_LDA_EXC_VXC_GENERATOR_SYCL_KERNEL(func) #define LDA_VXC_FXC_GENERATOR_SYCL_KERNEL(func) \ void RET_LDA_VXC_FXC_GENERATOR_SYCL_KERNEL(func) + #define RET_LDA_EXC_INC_GENERATOR_SYCL_KERNEL(func) \ RET_INC_GENERATOR_SYCL_KERNEL( LDA, EXC, func ) @@ -244,7 +245,7 @@ RET_INC_GENERATOR_SYCL_KERNEL( LDA, EXC_VXC, func ) #define RET_LDA_VXC_FXC_INC_GENERATOR_SYCL_KERNEL(func) \ RET_INC_GENERATOR_SYCL_KERNEL( LDA, VXC_FXC, func ) - + #define LDA_EXC_INC_GENERATOR_SYCL_KERNEL(func) \ void RET_LDA_EXC_INC_GENERATOR_SYCL_KERNEL(func) @@ -262,7 +263,7 @@ #endif -// GGA Generators +// GGA Generators #define RET_GGA_EXC_GENERATOR(func) RET_GENERATOR( GGA, EXC, func ) #define RET_GGA_VXC_GENERATOR(func) RET_GENERATOR( GGA, VXC, func ) @@ -579,3 +580,6 @@ } #endif + + + diff --git a/src/sycl/builtin_sycl.cxx b/src/sycl/builtin_sycl.cxx index d56b4a3..8631f1b 100644 --- a/src/sycl/builtin_sycl.cxx +++ b/src/sycl/builtin_sycl.cxx @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your diff --git a/src/sycl/libxc_device.cxx b/src/sycl/libxc_device.cxx index a19f6b8..758485e 100644 --- a/src/sycl/libxc_device.cxx +++ b/src/sycl/libxc_device.cxx @@ -2,23 +2,23 @@ * ExchCXX Copyright (c) 2020-2022, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of * any required approvals from the U.S. Dept. of Energy). All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -30,7 +30,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -51,31 +51,33 @@ -void recv_from_device( void* dest, const void* src, const size_t len, +void recv_from_device( void* dest, const void* src, const size_t len, sycl::queue* queue ) { try { queue->memcpy( dest, src, len ); - } catch( sycl::exception const &ex ) { - throw( std::runtime_error( "recv failed + " + std::string(ex.what()) ) ); + } catch( sycl::exception const &ex ) { + throw( std::runtime_error( "recv failed + " + std::string(ex.what()) ) ); } } -void send_to_device( void* dest, const void* src, const size_t len, +void send_to_device( void* dest, const void* src, const size_t len, sycl::queue* queue ) { try { queue->memcpy( dest, src, len ); - } catch( sycl::exception const &ex ) { - throw( std::runtime_error( "send failed + " + std::string(ex.what()) ) ); + } catch( sycl::exception const &ex ) { + throw( std::runtime_error( "send failed + " + std::string(ex.what()) ) ); } } void queue_sync( sycl::queue* queue ) { + queue->wait_and_throw(); + } diff --git a/src/sycl/xc_functional_device.cxx b/src/sycl/xc_functional_device.cxx index e862dd6..42cce84 100644 --- a/src/sycl/xc_functional_device.cxx +++ b/src/sycl/xc_functional_device.cxx @@ -2,23 +2,23 @@ * ExchCXX Copyright (c) 2020-2022, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of * any required approvals from the U.S. Dept. of Energy). All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -30,7 +30,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your diff --git a/test/xc_kernel_test.cxx b/test/xc_kernel_test.cxx index 9a90b6a..53998f6 100644 --- a/test/xc_kernel_test.cxx +++ b/test/xc_kernel_test.cxx @@ -1,30 +1,30 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of - * any required approvals from the U.S. Dept. of Energy). + * any required approvals from the U.S. Dept. of Energy). * * Portions Copyright (c) Microsoft Corporation. * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * (1) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * (2) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * (3) Neither the name of the University of California, Lawrence Berkeley * National Laboratory, U.S. Dept. of Energy nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * - * + * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -36,7 +36,7 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * + * * You are under no obligation whatsoever to provide any bug fixes, patches, * or upgrades to the features, functionality or performance of the source * code ("Enhancements") to anyone; however, if you choose to make your @@ -462,7 +462,7 @@ void kernel_test( TestInterface interface, Backend backend, Kernel kern, const double fill_val_vs = 50.; const double fill_val_vl = 3.; const double fill_val_vt = 5.; - + const double fill_val_v2rho2 = 10.; const double fill_val_v2rhosigma = 11.; const double fill_val_v2rholapl = 12.; @@ -476,7 +476,7 @@ void kernel_test( TestInterface interface, Backend backend, Kernel kern, const bool use_ref_values = (interface != TestInterface::EXC_INC) and - (interface != TestInterface::EXC_VXC_INC) and + (interface != TestInterface::EXC_VXC_INC) and (interface != TestInterface::FXC_INC) and (interface != TestInterface::VXC_FXC_INC); @@ -692,7 +692,7 @@ void kernel_test( TestInterface interface, Backend backend, Kernel kern, auto ref_vals = use_ref_values ? load_mgga_reference_values( kern, polar, func.needs_laplacian() ) : gen_mgga_reference_values( backend,kern, polar ); - //auto ref_vals = + //auto ref_vals = // gen_mgga_reference_values( backend,kern, polar ); size_t npts = ref_vals.npts; @@ -937,7 +937,7 @@ TEST_CASE( "Libxc Correctness Check", "[xc-libxc]" ) { kernel_test( TestInterface::EXC_VXC, Backend::libxc, Kernel::SCAN_X, Spin::Polarized ); } - + SECTION( "R2SCANL Unpolarized: EXC" ) { kernel_test( TestInterface::EXC, Backend::libxc, Kernel::R2SCANL_X, @@ -984,7 +984,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, const int len_sigma = func_libxc.sigma_buffer_len( npts ); const int len_lapl = func_libxc.lapl_buffer_len( npts ); const int len_tau = func_libxc.tau_buffer_len( npts ); - + const int len_v2rho2 = func_libxc.v2rho2_buffer_len( npts ); const int len_v2rhosigma = func_libxc.v2rhosigma_buffer_len( npts ); const int len_v2rholapl = func_libxc.v2rholapl_buffer_len( npts ); @@ -1029,14 +1029,14 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, tau_use = tau_zero; } - + std::vector exc_libxc( func_builtin.exc_buffer_len(npts) ); std::vector vrho_libxc( func_builtin.vrho_buffer_len(npts) ); std::vector vsigma_libxc( func_builtin.vsigma_buffer_len(npts) ); std::vector vlapl_libxc( func_builtin.vlapl_buffer_len(npts) ); std::vector vtau_libxc( func_builtin.vtau_buffer_len(npts) ); - + std::vector v2rho2_libxc ( len_v2rho2 ); std::vector v2rhosigma_libxc ( len_v2rhosigma ); std::vector v2rholapl_libxc ( len_v2rholapl ); @@ -1053,7 +1053,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, std::vector vsigma_builtin( func_builtin.vsigma_buffer_len(npts) ); std::vector vlapl_builtin( func_builtin.vlapl_buffer_len(npts) ); std::vector vtau_builtin( func_builtin.vtau_buffer_len(npts) ); - + std::vector v2rho2_builtin ( func_builtin.v2rho2_buffer_len(npts) ); std::vector v2rhosigma_builtin ( func_builtin.v2rhosigma_buffer_len(npts) ); std::vector v2rholapl_builtin ( func_builtin.v2rholapl_buffer_len(npts) ); @@ -1080,15 +1080,15 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, vrho_builtin.data() ); } else if( interface == TestInterface::FXC ) { - + func_libxc.eval_fxc( npts, rho_use.data(), v2rho2_libxc.data() ); func_builtin.eval_fxc( npts, rho_use.data(), v2rho2_builtin.data() ); - + } else if( interface == TestInterface::VXC_FXC ) { - + func_libxc.eval_vxc_fxc( npts, rho_use.data(), vrho_libxc.data(), v2rho2_libxc.data() ); func_builtin.eval_vxc_fxc( npts, rho_use.data(), vrho_builtin.data(), v2rho2_builtin.data() ); - + } } else if( func_libxc.is_gga() ) { @@ -1108,19 +1108,19 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, exc_builtin.data(), vrho_builtin.data(), vsigma_builtin.data() ); } else if( interface == TestInterface::FXC ) { - + func_libxc.eval_fxc( npts, rho_use.data(), sigma_use.data(), v2rho2_libxc.data(), v2rhosigma_libxc.data(), v2sigma2_libxc.data() ); func_builtin.eval_fxc( npts, rho_use.data(), sigma_use.data(), v2rho2_builtin.data(), v2rhosigma_builtin.data(), v2sigma2_builtin.data() ); - + } else if( interface == TestInterface::VXC_FXC ) { - + func_libxc.eval_vxc_fxc( npts, rho_use.data(), sigma_use.data(), vrho_libxc.data(), vsigma_libxc.data(), v2rho2_libxc.data(), v2rhosigma_libxc.data(), v2sigma2_libxc.data() ); func_builtin.eval_vxc_fxc( npts, rho_use.data(), sigma_use.data(), vrho_builtin.data(), vsigma_builtin.data(), v2rho2_builtin.data(), v2rhosigma_builtin.data(), v2sigma2_builtin.data() ); - + } } else if( func_libxc.is_mgga() ) { @@ -1140,7 +1140,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, lapl_use.data(), tau_use.data(), exc_builtin.data(), vrho_builtin.data(), vsigma_builtin.data(), vlapl_builtin.data(), vtau_builtin.data() ); } else if( interface == TestInterface::FXC ) { - + func_libxc.eval_fxc( npts, rho_use.data(), sigma_use.data(), lapl_use.data(), tau_use.data(), v2rho2_libxc.data(), v2rhosigma_libxc.data(), v2rholapl_libxc.data(), @@ -1153,9 +1153,9 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, v2rhotau_builtin.data(), v2sigma2_builtin.data(), v2sigmalapl_builtin.data(), v2sigmatau_builtin.data(), v2lapl2_builtin.data(), v2lapltau_builtin.data(), v2tau2_builtin.data() ); - + } else if( interface == TestInterface::VXC_FXC ) { - + func_libxc.eval_vxc_fxc( npts, rho_use.data(), sigma_use.data(), lapl_use.data(), tau_use.data(), vrho_libxc.data(), vsigma_libxc.data(), vlapl_libxc.data(), vtau_libxc.data(), @@ -1170,7 +1170,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, v2rhotau_builtin.data(), v2sigma2_builtin.data(), v2sigmalapl_builtin.data(), v2sigmatau_builtin.data(), v2lapl2_builtin.data(), v2lapltau_builtin.data(), v2tau2_builtin.data() ); - + } } @@ -1201,7 +1201,7 @@ void compare_libxc_builtin( TestInterface interface, EvalType evaltype, CHECK( vtau_builtin[i] == Approx(vtau_libxc[i]) ); } } - + if( interface == TestInterface::FXC || interface == TestInterface::VXC_FXC ) { for( auto i = 0ul; i < len_v2rho2; ++i ) { INFO( "V2RHO2 Fails: Kernel is " << kern << ", builtin = " << v2rho2_builtin[i] << ", libxc = " << v2rho2_libxc[i] ); @@ -1264,7 +1264,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Regular Eval : EXC + VXC" ) { @@ -1272,7 +1272,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC_VXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Regular Eval : FXC" ) { @@ -1281,7 +1281,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::FXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Regular Eval : VXC + FXC" ) { @@ -1290,14 +1290,14 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::VXC_FXC, EvalType::Regular, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Small Eval : EXC" ) { for( auto kern : builtin_supported_kernels ) { if(is_unstable_small(kern)) continue; if(is_epc(kern)) continue; - + compare_libxc_builtin( TestInterface::EXC, EvalType::Small, kern, Spin::Unpolarized ); } @@ -1337,7 +1337,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Zero Eval : EXC + VXC" ) { @@ -1345,7 +1345,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC_VXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Zero Eval : FXC" ) { @@ -1354,7 +1354,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::FXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } SECTION( "Unpolarized Zero Eval : VXC + FXC" ) { @@ -1363,7 +1363,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::VXC_FXC, EvalType::Zero, kern, Spin::Unpolarized ); - } + } } @@ -1383,7 +1383,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC_VXC, EvalType::Regular, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Regular Eval : FXC" ) { @@ -1401,7 +1401,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::VXC_FXC, EvalType::Regular, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Small Eval : EXC" ) { @@ -1449,7 +1449,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::EXC, EvalType::Zero, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Zero Eval : EXC + VXC" ) { @@ -1466,7 +1466,7 @@ TEST_CASE( "Builtin Corectness Test", "[xc-builtin]" ) { if(is_epc(kern)) continue; compare_libxc_builtin( TestInterface::FXC, EvalType::Zero, kern, Spin::Polarized ); - } + } } SECTION( "Polarized Zero Eval : VXC + FXC" ) { @@ -1523,7 +1523,7 @@ TEST_CASE( "Scale and Increment Interface", "[xc-inc]" ) { } SECTION( "Builtin Polarized EXC + VXC" ) { - for( auto kern : builtin_supported_kernels ) + for( auto kern : builtin_supported_kernels ) kernel_test( TestInterface::EXC_VXC_INC, Backend::builtin, kern, Spin::Polarized ); } @@ -1861,7 +1861,7 @@ void test_device_interface( TestInterface interface, EvalType evaltype, safe_cuda_cpy( v2lapltau_device, v2lapltau.data(), len_v2lapltau ); } - + // Evaluate functional on device cudaStream_t stream = 0; if( interface == TestInterface::EXC ) { @@ -1907,7 +1907,7 @@ void test_device_interface( TestInterface interface, EvalType evaltype, exc_device, vrho_device, vsigma_device, stream ); else if( func.is_mgga() ) func.eval_exc_vxc_inc_device( alpha, npts, rho_device, sigma_device, - lapl_device, tau_device, exc_device, vrho_device, vsigma_device, + lapl_device, tau_device, exc_device, vrho_device, vsigma_device, vlapl_device, vtau_device, stream ); } else if( interface == TestInterface::FXC ) { @@ -2144,10 +2144,12 @@ void test_device_interface( TestInterface interface, EvalType evaltype, } -#endif // EXCHCXX_ENABLE_CUDA/HIP +#endif + + #ifdef EXCHCXX_ENABLE_SYCL inline sycl::queue q{ sycl::default_selector_v, @@ -2707,6 +2709,8 @@ void test_device_interface( TestInterface interface, EvalType evaltype, #endif // EXCHCXX_ENABLE_SYCL + + #ifdef EXCHCXX_ENABLE_DEVICE TEST_CASE( "GPU Interfaces", "[xc-device]" ) { @@ -3591,7 +3595,7 @@ TEST_CASE( "GPU Interfaces", "[xc-device]" ) { Backend::builtin, kern, Spin::Polarized ); } } - + SECTION("VXC + FXC Zero: Polarized") { for( auto kern : builtin_supported_kernels ) { if(is_deorbitalized(kern)) continue; @@ -3632,5 +3636,6 @@ TEST_CASE( "GPU Interfaces", "[xc-device]" ) { } - #endif // EXCHCXX_ENABLE_DEVICE + +#endif From 0dc84b0e395ce871761bd45296cd2953b8eea1a1 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 29 Sep 2025 17:01:59 +0000 Subject: [PATCH 09/14] [SYCL] restore white-spaces --- CMakeLists.txt | 10 +++++----- test/xc_kernel_test.cxx | 2 -- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9856aff..f5604bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,7 @@ if( ${Libxc_FOUND} ) else() option( FETCHCONTENT_LIBXC_GIT_SHALLOW "Whether to use GIT_SHALLOW for FetchContent'ing libxc" ON ) - + FetchContent_Declare( libxc GIT_REPOSITORY https://gitlab.com/libxc/libxc.git @@ -91,8 +91,8 @@ else() FetchContent_MakeAvailable( libxc ) add_library( Libxc::xc ALIAS xc ) - target_include_directories( xc - PUBLIC + target_include_directories( xc + PUBLIC $ $ $ @@ -104,7 +104,7 @@ else() set_target_properties(xc PROPERTIES UNITY_BUILD OFF) message(STATUS "Will disable unity-build for Libxc::xc") endif() - + set( BUILD_TESTING ${OLD_BUILD_TESTING} CACHE BOOL "" FORCE ) endif() @@ -112,7 +112,7 @@ endif() else( EXCHCXX_ENABLE_LIBXC ) set( Libxc_FOUND FALSE ) endif( EXCHCXX_ENABLE_LIBXC ) - + add_subdirectory( src ) # Testing diff --git a/test/xc_kernel_test.cxx b/test/xc_kernel_test.cxx index 53998f6..71a099c 100644 --- a/test/xc_kernel_test.cxx +++ b/test/xc_kernel_test.cxx @@ -3637,5 +3637,3 @@ TEST_CASE( "GPU Interfaces", "[xc-device]" ) { } #endif // EXCHCXX_ENABLE_DEVICE - -#endif From 6d008d756fdb37f18e171b70038749a12f8fb7fe Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Mon, 29 Sep 2025 17:43:14 +0000 Subject: [PATCH 10/14] [SYCL] AoT builds --- CMakeLists.txt | 64 +++++++++++++++++++++++++++++++++++-- src/sycl/exchcxx_sycl.cmake | 20 +----------- 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5604bb..e11dfa3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,13 @@ option( EXCHCXX_ENABLE_SYCL "Enable Device Code (SYCL)" OFF ) option( EXCHCXX_ENABLE_LIBXC "Enable Libxc Backend" ON ) option( BUILD_SHARED_LIBS "Build Shared Libs" OFF ) +# --- SYCL Architecture Options (mutually exclusive) --- +option(EXCHCXX_SYCL_INTEL_PVC_ARCH "Target Intel PVC (Ponte Vecchio)" OFF) +option(EXCHCXX_SYCL_NV_SM_80_ARCH "Target NVIDIA SM_80 (A100)" OFF) +option(EXCHCXX_SYCL_NV_SM_90_ARCH "Target NVIDIA SM_90 (H100/H200)" OFF) +option(EXCHCXX_SYCL_AMD_GFX90A_ARCH "Target AMD gfx90a (MI200)" OFF) +option(EXCHCXX_SYCL_AMD_GFX942_ARCH "Target AMD gfx942 (MI300A/X)" OFF) + # Decided if we're compiling device bindings if( EXCHCXX_ENABLE_CUDA OR EXCHCXX_ENABLE_SYCL OR EXCHCXX_ENABLE_HIP ) @@ -31,9 +38,60 @@ endif() if(EXCHCXX_ENABLE_SYCL) - # e.g. intel_gpu_pvc | nvidia_gpu_sm_80 | nvidia_gpu_sm_90 | amd_gpu_gfx90a | amd_gpu_gfx942 - set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Alias for -fsycl-targets (see Users Manual)") -endif() + # Allowed alias strings (as consumed by -fsycl-targets) + set(_EXCHCXX_SYCL_ALLOWED + intel_gpu_pvc + nvidia_gpu_sm_80 + nvidia_gpu_sm_90 + amd_gpu_gfx90a + amd_gpu_gfx942 + ) + + # Collect selections from booleans + set(_EXCHCXX_SYCL_FROM_BOOLEANS "") + if(EXCHCXX_SYCL_INTEL_PVC_ARCH) + list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS intel_gpu_pvc) + endif() + if(EXCHCXX_SYCL_NV_SM_80_ARCH) + list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_80) + endif() + if(EXCHCXX_SYCL_NV_SM_90_ARCH) + list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_90) + endif() + if(EXCHCXX_SYCL_AMD_GFX90A_ARCH) + list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx90a) + endif() + if(EXCHCXX_SYCL_AMD_GFX942_ARCH) + list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx942) + endif() + + # Resolve the selected target (prefer booleans; fall back to user-provided EXCHCXX_SYCL_TARGET) + set(_EXCHCXX_SELECTED "") + list(LENGTH _EXCHCXX_SYCL_FROM_BOOLEANS _exchcxx_bool_count) + if(_exchcxx_bool_count GREATER 1) + message(FATAL_ERROR + "Multiple SYCL arch options enabled (${_EXCHCXX_SYCL_FROM_BOOLEANS}). Enable exactly one.") + elseif(_exchcxx_bool_count EQUAL 1) + list(GET _EXCHCXX_SYCL_FROM_BOOLEANS 0 _EXCHCXX_SELECTED) + elseif(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "") + set(_EXCHCXX_SELECTED "${EXCHCXX_SYCL_TARGET}") # backward compat + endif() + + # Validate & cache the resolved target (or leave empty for JIT) + if(NOT _EXCHCXX_SELECTED STREQUAL "") + list(FIND _EXCHCXX_SYCL_ALLOWED "${_EXCHCXX_SELECTED}" _exchcxx_sycl_idx) + if(_exchcxx_sycl_idx EQUAL -1) + message(FATAL_ERROR + "Invalid SYCL AoT target '${_EXCHCXX_SELECTED}'. " + "Allowed values: ${_EXCHCXX_SYCL_ALLOWED}") + endif() + set(EXCHCXX_SYCL_TARGET "${_EXCHCXX_SELECTED}" CACHE STRING "Resolved SYCL target alias" FORCE) + else() + set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Resolved SYCL target alias (empty => JIT)" FORCE) + message(STATUS "ExchCXX SYCL AoT disabled (no target selected) — will JIT at runtime.") + endif() + +endif(EXCHCXX_ENABLE_SYCL) # Append local cmake directory to find CMAKE Modules diff --git a/src/sycl/exchcxx_sycl.cmake b/src/sycl/exchcxx_sycl.cmake index 3b05354..fbd7a9e 100644 --- a/src/sycl/exchcxx_sycl.cmake +++ b/src/sycl/exchcxx_sycl.cmake @@ -13,32 +13,14 @@ list( APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ) find_package( SYCL REQUIRED ) target_link_libraries( exchcxx PUBLIC SYCL::SYCL ) -# --- AoT-builds SYCL target alias pass-through --- -set(_EXCHCXX_SYCL_ALLOWED - intel_gpu_pvc - nvidia_gpu_sm_80 - nvidia_gpu_sm_90 - amd_gpu_gfx90a - amd_gpu_gfx942) if(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "") - list(FIND _EXCHCXX_SYCL_ALLOWED "${EXCHCXX_SYCL_TARGET}" _exchcxx_sycl_idx) - if(_exchcxx_sycl_idx EQUAL -1) - message(FATAL_ERROR - "Invalid EXCHCXX_SYCL_TARGET='${EXCHCXX_SYCL_TARGET}'. " - "Allowed values: ${_EXCHCXX_SYCL_ALLOWED}") - endif() - - # Apply ONLY to this target (both compile & link) target_compile_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) target_link_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) message(STATUS "ExchCXX SYCL AoT enabled for target: ${EXCHCXX_SYCL_TARGET}") - - # target_compile_options( exchcxx PRIVATE -Wno-unused-parameter -Wno-unused-variable -fsycl-device-only -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" ) - # target_link_options( exchcxx PRIVATE -fsycl-device-only -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" ) - endif() + target_compile_options(exchcxx PRIVATE $<$:-ffp-model=precise>) target_link_options(exchcxx PRIVATE -fsycl-max-parallel-link-jobs=20) From ecb0556406d5f57f9c1ad1c2b9853b22f01764cf Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Tue, 30 Sep 2025 03:24:43 +0000 Subject: [PATCH 11/14] [SYCL] fix cmake changes --- CMakeLists.txt | 65 +------------------ .../impl/builtin/kernels/deorbitalized.hpp | 2 +- src/sycl/exchcxx_sycl.cmake | 13 ++++ 3 files changed, 17 insertions(+), 63 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e11dfa3..92eca2a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,14 +12,6 @@ option( EXCHCXX_ENABLE_SYCL "Enable Device Code (SYCL)" OFF ) option( EXCHCXX_ENABLE_LIBXC "Enable Libxc Backend" ON ) option( BUILD_SHARED_LIBS "Build Shared Libs" OFF ) -# --- SYCL Architecture Options (mutually exclusive) --- -option(EXCHCXX_SYCL_INTEL_PVC_ARCH "Target Intel PVC (Ponte Vecchio)" OFF) -option(EXCHCXX_SYCL_NV_SM_80_ARCH "Target NVIDIA SM_80 (A100)" OFF) -option(EXCHCXX_SYCL_NV_SM_90_ARCH "Target NVIDIA SM_90 (H100/H200)" OFF) -option(EXCHCXX_SYCL_AMD_GFX90A_ARCH "Target AMD gfx90a (MI200)" OFF) -option(EXCHCXX_SYCL_AMD_GFX942_ARCH "Target AMD gfx942 (MI300A/X)" OFF) - - # Decided if we're compiling device bindings if( EXCHCXX_ENABLE_CUDA OR EXCHCXX_ENABLE_SYCL OR EXCHCXX_ENABLE_HIP ) set( EXCHCXX_ENABLE_DEVICE TRUE CACHE BOOL "Enable Device Code" ) @@ -38,60 +30,9 @@ endif() if(EXCHCXX_ENABLE_SYCL) - # Allowed alias strings (as consumed by -fsycl-targets) - set(_EXCHCXX_SYCL_ALLOWED - intel_gpu_pvc - nvidia_gpu_sm_80 - nvidia_gpu_sm_90 - amd_gpu_gfx90a - amd_gpu_gfx942 - ) - - # Collect selections from booleans - set(_EXCHCXX_SYCL_FROM_BOOLEANS "") - if(EXCHCXX_SYCL_INTEL_PVC_ARCH) - list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS intel_gpu_pvc) - endif() - if(EXCHCXX_SYCL_NV_SM_80_ARCH) - list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_80) - endif() - if(EXCHCXX_SYCL_NV_SM_90_ARCH) - list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS nvidia_gpu_sm_90) - endif() - if(EXCHCXX_SYCL_AMD_GFX90A_ARCH) - list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx90a) - endif() - if(EXCHCXX_SYCL_AMD_GFX942_ARCH) - list(APPEND _EXCHCXX_SYCL_FROM_BOOLEANS amd_gpu_gfx942) - endif() - - # Resolve the selected target (prefer booleans; fall back to user-provided EXCHCXX_SYCL_TARGET) - set(_EXCHCXX_SELECTED "") - list(LENGTH _EXCHCXX_SYCL_FROM_BOOLEANS _exchcxx_bool_count) - if(_exchcxx_bool_count GREATER 1) - message(FATAL_ERROR - "Multiple SYCL arch options enabled (${_EXCHCXX_SYCL_FROM_BOOLEANS}). Enable exactly one.") - elseif(_exchcxx_bool_count EQUAL 1) - list(GET _EXCHCXX_SYCL_FROM_BOOLEANS 0 _EXCHCXX_SELECTED) - elseif(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "") - set(_EXCHCXX_SELECTED "${EXCHCXX_SYCL_TARGET}") # backward compat - endif() - - # Validate & cache the resolved target (or leave empty for JIT) - if(NOT _EXCHCXX_SELECTED STREQUAL "") - list(FIND _EXCHCXX_SYCL_ALLOWED "${_EXCHCXX_SELECTED}" _exchcxx_sycl_idx) - if(_exchcxx_sycl_idx EQUAL -1) - message(FATAL_ERROR - "Invalid SYCL AoT target '${_EXCHCXX_SELECTED}'. " - "Allowed values: ${_EXCHCXX_SYCL_ALLOWED}") - endif() - set(EXCHCXX_SYCL_TARGET "${_EXCHCXX_SELECTED}" CACHE STRING "Resolved SYCL target alias" FORCE) - else() - set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Resolved SYCL target alias (empty => JIT)" FORCE) - message(STATUS "ExchCXX SYCL AoT disabled (no target selected) — will JIT at runtime.") - endif() - -endif(EXCHCXX_ENABLE_SYCL) + # e.g. intel_gpu_pvc | nvidia_gpu_sm_80 | nvidia_gpu_sm_90 | amd_gpu_gfx90a | amd_gpu_gfx942 + set(EXCHCXX_SYCL_TARGET "" CACHE STRING "Alias for -fsycl-targets (see Users Manual)") +endif() # Append local cmake directory to find CMAKE Modules diff --git a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp index e00bc23..654e615 100644 --- a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp +++ b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp @@ -1,5 +1,5 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of diff --git a/src/sycl/exchcxx_sycl.cmake b/src/sycl/exchcxx_sycl.cmake index fbd7a9e..9ce1e4d 100644 --- a/src/sycl/exchcxx_sycl.cmake +++ b/src/sycl/exchcxx_sycl.cmake @@ -14,7 +14,20 @@ find_package( SYCL REQUIRED ) target_link_libraries( exchcxx PUBLIC SYCL::SYCL ) +# --- AoT-builds SYCL target alias pass-through --- +set(_EXCHCXX_SYCL_ALLOWED + intel_gpu_pvc + nvidia_gpu_sm_80 + nvidia_gpu_sm_90 + amd_gpu_gfx90a + amd_gpu_gfx942 +) if(DEFINED EXCHCXX_SYCL_TARGET AND NOT EXCHCXX_SYCL_TARGET STREQUAL "") + list(FIND _EXCHCXX_SYCL_ALLOWED "${EXCHCXX_SYCL_TARGET}" _exchcxx_sycl_idx) + if(_exchcxx_sycl_idx EQUAL -1) + message(FATAL_ERROR "Invalid EXCHCXX_SYCL_TARGET='${EXCHCXX_SYCL_TARGET}'. " "Allowed values: ${_EXCHCXX_SYCL_ALLOWED}") + endif() + target_compile_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) target_link_options( exchcxx PRIVATE -fsycl-targets=${EXCHCXX_SYCL_TARGET} ) message(STATUS "ExchCXX SYCL AoT enabled for target: ${EXCHCXX_SYCL_TARGET}") From 089c5873a4ab23c85e90da48e8bf53731e7a7887 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Tue, 30 Sep 2025 16:26:35 +0000 Subject: [PATCH 12/14] [SYCL] address white-space --- include/exchcxx/impl/builtin/kernels/deorbitalized.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp index 654e615..e00bc23 100644 --- a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp +++ b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp @@ -1,5 +1,5 @@ /** - * ExchCXX + * ExchCXX * * Copyright (c) 2020-2024, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of From b38d8f064be46be9b78515ec41c28d5b4f90b33d Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Tue, 30 Sep 2025 19:22:10 +0000 Subject: [PATCH 13/14] restore the entire headers --- .../impl/builtin/kernels/deorbitalized.hpp | 16 ++------ include/exchcxx/impl/builtin/util.hpp | 38 ++++++++++++------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp index e00bc23..474f04c 100644 --- a/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp +++ b/include/exchcxx/impl/builtin/kernels/deorbitalized.hpp @@ -141,10 +141,8 @@ struct kernel_traits> { double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_vxc_fxc_unpolar not implemented for deorbitalized kernels"); @@ -172,10 +170,8 @@ struct kernel_traits> { double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_vxc_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, vrho_a, vrho_b, vsigma_aa, vsigma_ab, vsigma_bb, vlapl_a, vlapl_b, vtau_a, vtau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_vxc_fxc_polar not implemented for deorbitalized kernels"); @@ -188,10 +184,8 @@ struct kernel_traits> { double& v2rho2, double& v2rhosigma, double& v2rholapl, double& v2rhotau, double& v2sigma2, double& v2sigmalapl, double& v2sigmatau, double& v2lapl2, double& v2lapltau, double& v2tau2 ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_fxc_unpolar not implemented for deorbitalized kernels\n"); #else unused(rho, sigma, lapl, tau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2); throw std::runtime_error("eval_fxc_unpolar not implemented for deorbitalized kernels"); @@ -216,10 +210,8 @@ struct kernel_traits> { double& v2lapl2_aa, double& v2lapl2_ab, double& v2lapl2_bb, double& v2lapltau_a_a, double& v2lapltau_a_b, double& v2lapltau_b_a, double& v2lapltau_b_b, double& v2tau2_aa, double& v2tau2_ab, double& v2tau2_bb ) { - #if defined(__CUDACC__) || defined(__HIPCC__) + #if defined(__CUDACC__) || defined(__HIPCC__) || defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); - #elif defined(__SYCL_DEVICE_ONLY__) || defined(EXCHCXX_ENABLE_SYCL) - sycl::ext::oneapi::experimental::printf("eval_fxc_polar not implemented for deorbitalized kernels\n"); #else unused(rho_a, rho_b, sigma_aa, sigma_ab, sigma_bb, lapl_a, lapl_b, tau_a, tau_b, v2rho2_aa, v2rho2_ab, v2rho2_bb, v2rhosigma_a_aa, v2rhosigma_a_ab, v2rhosigma_a_bb, v2rhosigma_b_aa, v2rhosigma_b_ab, v2rhosigma_b_bb, v2rholapl_a_a, v2rholapl_a_b, v2rholapl_b_a, v2rholapl_b_b, v2rhotau_a_a, v2rhotau_a_b, v2rhotau_b_a, v2rhotau_b_b, v2sigma2_aa_aa, v2sigma2_aa_ab, v2sigma2_aa_bb, v2sigma2_ab_ab, v2sigma2_ab_bb, v2sigma2_bb_bb, v2sigmalapl_aa_a, v2sigmalapl_aa_b, v2sigmalapl_ab_a, v2sigmalapl_ab_b, v2sigmalapl_bb_a, v2sigmalapl_bb_b, v2sigmatau_aa_a, v2sigmatau_aa_b, v2sigmatau_ab_a, v2sigmatau_ab_b, v2sigmatau_bb_a, v2sigmatau_bb_b, v2lapl2_aa, v2lapl2_ab, v2lapl2_bb, v2lapltau_a_a, v2lapltau_a_b, v2lapltau_b_a, v2lapltau_b_b, v2tau2_aa, v2tau2_ab, v2tau2_bb); throw std::runtime_error("eval_fxc_polar not implemented for deorbitalized kernels"); diff --git a/include/exchcxx/impl/builtin/util.hpp b/include/exchcxx/impl/builtin/util.hpp index 6d9e1a9..89d8bda 100644 --- a/include/exchcxx/impl/builtin/util.hpp +++ b/include/exchcxx/impl/builtin/util.hpp @@ -56,14 +56,6 @@ #include #include -#if defined(__CUDACC__) || defined(__HIPCC__) -#define EXCHCXX_READONLY_TABLE static __device__ -#elif defined(__SYCL_DEVICE_ONLY__) -#define EXCHCXX_READONLY_TABLE inline constexpr -#else -#define EXCHCXX_READONLY_TABLE static -#endif - namespace ExchCXX { @@ -148,7 +140,10 @@ SAFE_INLINE(auto) xc_cheb_eval(const double x, const double *cs, const int N) return 0.5*(b0 - b2); } // The following data is taken from libxc -EXCHCXX_READONLY_TABLE double AE11_data[39] = { +#if defined(__CUDACC__) || defined(__HIPCC__) +__device__ +#endif +static double AE11_data[39] = { 0.121503239716065790, -0.065088778513550150, 0.004897651357459670, -0.000649237843027216, 0.000093840434587471, 0.000000420236380882, -0.000008113374735904, 0.000002804247688663, 0.000000056487164441, -0.000000344809174450, 0.000000058209273578, 0.000000038711426349, -0.000000012453235014, -0.000000005118504888, 0.000000002148771527, @@ -159,7 +154,10 @@ EXCHCXX_READONLY_TABLE double AE11_data[39] = { -0.000000000000000024, -0.000000000000000201, -0.000000000000000082, 0.000000000000000017 }; -EXCHCXX_READONLY_TABLE double AE12_data[25] = { +#if defined(__CUDACC__) || defined(__HIPCC__) +__device__ +#endif +static double AE12_data[25] = { 0.582417495134726740, -0.158348850905782750, -0.006764275590323141, 0.005125843950185725, 0.000435232492169391, -0.000143613366305483, -0.000041801320556301, -0.000002713395758640, 0.000001151381913647, 0.000000420650022012, 0.000000066581901391, 0.000000000662143777, -0.000000002844104870, -0.000000000940724197, -0.000000000177476602, @@ -167,21 +165,30 @@ EXCHCXX_READONLY_TABLE double AE12_data[25] = { 0.000000000000010707, -0.000000000000000537, -0.000000000000000716, -0.000000000000000244, -0.000000000000000058 }; -EXCHCXX_READONLY_TABLE double E11_data[19] = { +#if defined(__CUDACC__) || defined(__HIPCC__) +__device__ +#endif +static double E11_data[19] = { -16.11346165557149402600, 7.79407277874268027690, -1.95540581886314195070, 0.37337293866277945612, -0.05692503191092901938, 0.00721107776966009185, -0.00078104901449841593, 0.00007388093356262168, -0.00000620286187580820, 0.00000046816002303176, -0.00000003209288853329, 0.00000000201519974874, -0.00000000011673686816, 0.00000000000627627066, -0.00000000000031481541, 0.00000000000001479904, -0.00000000000000065457, 0.00000000000000002733, -0.00000000000000000108 }; -EXCHCXX_READONLY_TABLE double E12_data[16] = { +#if defined(__CUDACC__) || defined(__HIPCC__) +__device__ +#endif +static double E12_data[16] = { -0.03739021479220279500, 0.04272398606220957700, -0.13031820798497005440, 0.01441912402469889073, -0.00134617078051068022, 0.00010731029253063780, -0.00000742999951611943, 0.00000045377325690753, -0.00000002476417211390, 0.00000000122076581374, -0.00000000005485141480, 0.00000000000226362142, -0.00000000000008635897, 0.00000000000000306291, -0.00000000000000010148, 0.00000000000000000315 }; -EXCHCXX_READONLY_TABLE double AE13_data[25] = { +#if defined(__CUDACC__) || defined(__HIPCC__) +__device__ +#endif +static double AE13_data[25] = { -0.605773246640603460, -0.112535243483660900, 0.013432266247902779, -0.001926845187381145, 0.000309118337720603, -0.000053564132129618, 0.000009827812880247, -0.000001885368984916, 0.000000374943193568, -0.000000076823455870, 0.000000016143270567, -0.000000003466802211, 0.000000000758754209, -0.000000000168864333, 0.000000000038145706, @@ -189,7 +196,10 @@ EXCHCXX_READONLY_TABLE double AE13_data[25] = { 0.000000000000006457, -0.000000000000001568, 0.000000000000000383, -0.000000000000000094, 0.000000000000000023 }; -EXCHCXX_READONLY_TABLE double AE14_data[26] = { +#if defined(__CUDACC__) || defined(__HIPCC__) +__device__ +#endif +static double AE14_data[26] = { -0.18929180007530170, -0.08648117855259871, 0.00722410154374659, -0.00080975594575573, 0.00010999134432661, -0.00001717332998937, 0.00000298562751447, -0.00000056596491457, 0.00000011526808397, -0.00000002495030440, 0.00000000569232420, -0.00000000135995766, 0.00000000033846628, -0.00000000008737853, 0.00000000002331588, From 85fd0756f80eef4a9e35a93966a0b5b0b653e4e3 Mon Sep 17 00:00:00 2001 From: Abhishek Bagusetty Date: Tue, 30 Sep 2025 19:28:10 +0000 Subject: [PATCH 14/14] redo util.hpp --- include/exchcxx/impl/builtin/util.hpp | 32 ++++++++++----------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/include/exchcxx/impl/builtin/util.hpp b/include/exchcxx/impl/builtin/util.hpp index 89d8bda..f7dbd8a 100644 --- a/include/exchcxx/impl/builtin/util.hpp +++ b/include/exchcxx/impl/builtin/util.hpp @@ -58,6 +58,13 @@ namespace ExchCXX { +#if defined(__CUDACC__) || defined(__HIPCC__) +#define EXCHCXX_READONLY_TABLE static __device__ +#elif defined(__SYCL_DEVICE_ONLY__) +#define EXCHCXX_READONLY_TABLE inline constexpr +#else +#define EXCHCXX_READONLY_TABLE static +#endif #if defined(__CUDACC__) || defined(__HIPCC__) @@ -154,10 +161,7 @@ static double AE11_data[39] = { -0.000000000000000024, -0.000000000000000201, -0.000000000000000082, 0.000000000000000017 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double AE12_data[25] = { +EXCHCXX_READONLY_TABLE double AE12_data[25] = { 0.582417495134726740, -0.158348850905782750, -0.006764275590323141, 0.005125843950185725, 0.000435232492169391, -0.000143613366305483, -0.000041801320556301, -0.000002713395758640, 0.000001151381913647, 0.000000420650022012, 0.000000066581901391, 0.000000000662143777, -0.000000002844104870, -0.000000000940724197, -0.000000000177476602, @@ -165,30 +169,21 @@ static double AE12_data[25] = { 0.000000000000010707, -0.000000000000000537, -0.000000000000000716, -0.000000000000000244, -0.000000000000000058 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double E11_data[19] = { +EXCHCXX_READONLY_TABLE double E11_data[19] = { -16.11346165557149402600, 7.79407277874268027690, -1.95540581886314195070, 0.37337293866277945612, -0.05692503191092901938, 0.00721107776966009185, -0.00078104901449841593, 0.00007388093356262168, -0.00000620286187580820, 0.00000046816002303176, -0.00000003209288853329, 0.00000000201519974874, -0.00000000011673686816, 0.00000000000627627066, -0.00000000000031481541, 0.00000000000001479904, -0.00000000000000065457, 0.00000000000000002733, -0.00000000000000000108 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double E12_data[16] = { +EXCHCXX_READONLY_TABLE double E12_data[16] = { -0.03739021479220279500, 0.04272398606220957700, -0.13031820798497005440, 0.01441912402469889073, -0.00134617078051068022, 0.00010731029253063780, -0.00000742999951611943, 0.00000045377325690753, -0.00000002476417211390, 0.00000000122076581374, -0.00000000005485141480, 0.00000000000226362142, -0.00000000000008635897, 0.00000000000000306291, -0.00000000000000010148, 0.00000000000000000315 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double AE13_data[25] = { +EXCHCXX_READONLY_TABLE double AE13_data[25] = { -0.605773246640603460, -0.112535243483660900, 0.013432266247902779, -0.001926845187381145, 0.000309118337720603, -0.000053564132129618, 0.000009827812880247, -0.000001885368984916, 0.000000374943193568, -0.000000076823455870, 0.000000016143270567, -0.000000003466802211, 0.000000000758754209, -0.000000000168864333, 0.000000000038145706, @@ -196,10 +191,7 @@ static double AE13_data[25] = { 0.000000000000006457, -0.000000000000001568, 0.000000000000000383, -0.000000000000000094, 0.000000000000000023 }; -#if defined(__CUDACC__) || defined(__HIPCC__) -__device__ -#endif -static double AE14_data[26] = { +EXCHCXX_READONLY_TABLE double AE14_data[26] = { -0.18929180007530170, -0.08648117855259871, 0.00722410154374659, -0.00080975594575573, 0.00010999134432661, -0.00001717332998937, 0.00000298562751447, -0.00000056596491457, 0.00000011526808397, -0.00000002495030440, 0.00000000569232420, -0.00000000135995766, 0.00000000033846628, -0.00000000008737853, 0.00000000002331588,