diff --git a/CMakeLists.txt b/CMakeLists.txt index ab43b11..1ab0fa6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,12 +15,12 @@ set( SHIVA_VERSION_PATCHLEVEL 0 ) # check if Shiva is build as a submodule or a separate project get_directory_property( parent_dir PARENT_DIRECTORY ) if(parent_dir) - set( is_submodule ON ) + set( SHIVA_IS_SUBMODULE ON ) else() - set( is_submodule OFF ) + set( SHIVA_IS_SUBMODULE OFF ) endif() -if( NOT is_submodule ) +if( NOT SHIVA_IS_SUBMODULE ) message( "not a submodule") project( Shiva LANGUAGES CXX C ) @@ -66,12 +66,22 @@ include( cmake/Macros.cmake ) include( cmake/Config.cmake ) +set(SHIVA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) +set(SHIVA_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) + +message( STATUS "SHIVA_BINARY_DIR: ${SHIVA_BINARY_DIR}" ) +message( STATUS "SHIVA_SOURCE_DIR: ${SHIVA_SOURCE_DIR}" ) + + add_subdirectory( src ) -add_subdirectory( tpl/camp ) -target_compile_options( camp PRIVATE "-Wno-shadow") -configure_file(tpl/camp/include/camp/config.in.hpp - ${PROJECT_BINARY_DIR}/include/camp/config.hpp) +if( SHIVA_ENABLE_CAMP ) + add_subdirectory( tpl/camp ) + target_compile_options( camp PRIVATE "-Wno-shadow") + + configure_file(tpl/camp/include/camp/config.in.hpp + ${PROJECT_BINARY_DIR}/include/camp/config.hpp) +endif() if( SHIVA_ENABLE_DOCS ) diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake index cf3c275..82a64a7 100644 --- a/cmake/CMakeBasics.cmake +++ b/cmake/CMakeBasics.cmake @@ -27,5 +27,20 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG CLANG "-fstandalone-debug" ) - -set( CAMP_ENABLE_TESTS OFF CACHE BOOL "") +option( SHIVA_ENABLE_CAMP OFF ) +option( CAMP_ENABLE_TESTS OFF ) + + +if( ENABLE_CUDA ) + if( CUDA_VERSION AND CUDA_VERSION_MAJOR AND CUDA_VERSION_MINOR ) + set( SHIVA_CUDA_VERSION ${CUDA_VERSION} ) + set( SHIVA_CUDA_MAJOR ${CUDA_VERSION_MAJOR} ) + set( SHIVA_CUDA_MINOR ${CUDA_VERSION_MINOR} ) + else() + message(FATAL_ERROR "CUDA_VERSION_MAJOR and CUDA_VERSION_MINOR not defined") + endif() +else() + set( SHIVA_CUDA_VERSION 0 ) + set( SHIVA_CUDA_MAJOR 0 ) + set( SHIVA_CUDA_MINOR 0 ) +endif() diff --git a/cmake/Config.cmake b/cmake/Config.cmake index 80af36e..849955d 100644 --- a/cmake/Config.cmake +++ b/cmake/Config.cmake @@ -1,6 +1,7 @@ # set( PREPROCESSOR_DEFINES CUDA HIP + CAMP BOUNDS_CHECK ) diff --git a/cmake/blt b/cmake/blt index fb4246b..9ff7734 160000 --- a/cmake/blt +++ b/cmake/blt @@ -1 +1 @@ -Subproject commit fb4246b8bae74c3d7291bef9698fd38863844680 +Subproject commit 9ff77344f0b2a6ee345e452bddd6bfd46cbbfa35 diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index 390e74a..8e64b01 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -14,4 +14,9 @@ /* #undef SHIVA_USE_CALIPER */ +#define SHIVA_USE_CAMP + #define SHIVA_USE_BOUNDS_CHECK + +#define SHIVA_CUDA_MAJOR 0 +#define SHIVA_CUDA_MINOR 0 diff --git a/hostconfigs/TTE/maple_rocky9.cmake b/hostconfigs/TTE/maple_rocky9.cmake new file mode 100644 index 0000000..e3f4a83 --- /dev/null +++ b/hostconfigs/TTE/maple_rocky9.cmake @@ -0,0 +1,24 @@ +set(CONFIG_NAME "maple_rocky9" CACHE PATH "") + +set(COMPILER_DIR /opt/rh/gcc-toolset-13/root/ ) +set(CMAKE_C_COMPILER ${COMPILER_DIR}/bin/gcc CACHE PATH "") +set(CMAKE_CXX_COMPILER ${COMPILER_DIR}/bin/g++ CACHE PATH "") + +# C++ options +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -mtune=native -march=native" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +# Cuda options +set(ENABLE_CUDA ON CACHE BOOL "") +set(CUDA_TOOLKIT_ROOT_DIR /hrtc/apps/cuda/12.6.20/aarch64/rocky9 CACHE STRING "") +set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "") +set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "") +set(CMAKE_CUDA_ARCHITECTURES 90 CACHE STRING "") +set(CMAKE_CUDA_STANDARD 17 CACHE STRING "") +set(CMAKE_CUDA_FLAGS "-restrict --expt-extended-lambda --expt-relaxed-constexpr -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "") +#set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3 -Xcompiler -mcpu=powerpc64le -Xcompiler -mtune=powerpc64le" CACHE STRING "") +#set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "") +#set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "") + +set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "Disable CAMP support" FORCE ) \ No newline at end of file diff --git a/scripts/aggregateOrSplit.py b/scripts/aggregateOrSplit.py index 89baa12..251174e 100644 --- a/scripts/aggregateOrSplit.py +++ b/scripts/aggregateOrSplit.py @@ -20,11 +20,13 @@ def create_dependency_graph(self, header, include_paths=None): if include_paths is None: include_paths = [] + header = os.path.abspath(header) # Normalize here + if header in self.dependencies: return # Already processed self.dependencies[header] = set() - base_path = os.path.dirname(os.path.abspath(header)) # Base directory of the current header + base_path = os.path.dirname(header) # Base directory of the current header try: with open(header, 'r') as file: @@ -34,10 +36,10 @@ def create_dependency_graph(self, header, include_paths=None): included_file = include_match.group(1) if included_file != self.config_file: - resolved_path = self.resolve_path( - included_file, base_path, include_paths) + resolved_path = self.resolve_path( included_file, base_path, include_paths) if resolved_path: + resolved_path = os.path.abspath(resolved_path) self.dependencies[header].add(resolved_path) if os.path.exists(resolved_path): @@ -82,16 +84,21 @@ def resolve_path(self, included_file, base_path, include_paths): return None # Return None if no resolution was possible + def generate_header_list(self): remaining_dependencies = self.dependencies.copy() size_of_remaining_dependencies = len(remaining_dependencies) + unique_files = set() # Track unique files by absolute path while size_of_remaining_dependencies > 0: local_included = [] for key in remaining_dependencies: if len(remaining_dependencies[key]) == 0: - self.included_list.append(key) + abs_key = os.path.abspath(key) + if abs_key not in unique_files: + self.included_list.append(abs_key) + unique_files.add(abs_key) local_included.append(key) for included_key in local_included: @@ -111,6 +118,7 @@ def process_header(header_path, output): """ Processes a single header file, commenting out includes and pragmas. """ + header_path = os.path.abspath(header_path) if header_path in self.included: return # Avoid duplicate processing self.included.add(header_path) @@ -133,6 +141,7 @@ def process_header(header_path, output): with open(output_file, 'w') as output: for header in headers: + header = os.path.abspath(header) self.create_dependency_graph(header, include_paths) for header in self.dependencies: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ca02250..b680057 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,7 +23,8 @@ blt_add_library( NAME shiva target_include_directories( shiva INTERFACE - $ + $ + $ $ ) install( FILES ${shiva_headers} diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in index d7535f8..7103b82 100644 --- a/src/ShivaConfig.hpp.in +++ b/src/ShivaConfig.hpp.in @@ -14,4 +14,9 @@ #cmakedefine SHIVA_USE_CALIPER -#cmakedefine SHIVA_USE_BOUNDS_CHECK \ No newline at end of file +#cmakedefine SHIVA_USE_CAMP + +#cmakedefine SHIVA_USE_BOUNDS_CHECK + +#define SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@ +#define SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@ diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 683c232..bc450a1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -35,14 +35,14 @@ blt_add_library( NAME common target_include_directories( common INTERFACE - $ - $ + $ + $ $ ) target_include_directories( common SYSTEM INTERFACE - $ - $ ) + $ + $ ) install( FILES ${common_headers} DESTINATION include/common ) diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp index c3280e1..f8ddd3d 100644 --- a/src/common/ShivaMacros.hpp +++ b/src/common/ShivaMacros.hpp @@ -105,6 +105,39 @@ void i_g_n_o_r_e( ARGS const & ... ) {} +/** + * @brief This macro is used to detect the presence of builtin functions. + */ +#ifndef SHIVA_HAS_BUILTIN + #ifdef __has_builtin + #define SHIVA_HAS_BUILTIN( x ) __has_builtin( x ) + #else + #define SHIVA_HAS_BUILTIN( x ) 0 + #endif +#endif + +/** + * @brief Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain + */ +#if defined(__CUDA_ARCH__) +// Device code (nvcc, hipcc): no support in C++17 + #define SHIVA_IS_CONST_EVAL() (false) + +#elif SHIVA_HAS_BUILTIN( __builtin_is_constant_evaluated ) +// GCC / Clang host code + #define SHIVA_IS_CONST_EVAL() (__builtin_is_constant_evaluated()) + +#elif defined(_MSC_VER) +// MSVC + #define SHIVA_IS_CONST_EVAL() (__is_constant_evaluated()) + +#else +// Fallback: always runtime + #define SHIVA_IS_CONST_EVAL() (false) +#endif + + + /** * @brief This macro is used to implement an assertion. * @param cond The condition to assert is true. @@ -113,7 +146,7 @@ void i_g_n_o_r_e( ARGS const & ... ) {} #define SHIVA_ASSERT_MSG( cond, ... ) \ do { \ if ( !(cond)) { \ - if ( !__builtin_is_constant_evaluated()) { \ + if ( !SHIVA_IS_CONST_EVAL() ) { \ shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \ } \ } \ diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index 2e0baa1..eb8d3b8 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -28,25 +28,27 @@ namespace shiva { #if defined(SHIVA_USE_DEVICE) - #if defined(SHIVA_USE_CUDA) - #define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES ); - #define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES ); - #define deviceDeviceSynchronize() cudaDeviceSynchronize(); - #define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND ); - #define deviceFree( PTR ) cudaFree( PTR ); - #define deviceError_t cudaError_t - #define deviceSuccess cudaSuccess - #define deviceGetErrorString cudaGetErrorString - #elif defined(SHIVA_USE_HIP) - #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); - #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); - #define deviceDeviceSynchronize() hipDeviceSynchronize(); - #define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND ); - #define deviceFree( PTR ) hipFree( PTR ); - #define deviceError_t hipError_t - #define deviceSuccess = hipSuccess; - #define deviceGetErrorString hipGetErrorString - #endif +#if defined(SHIVA_USE_CUDA) +#define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES ); +#define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES ); +#define deviceDeviceSynchronize() cudaDeviceSynchronize(); +#define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND ); +#define deviceFree( PTR ) cudaFree( PTR ); +#define deviceError_t cudaError_t +#define deviceGetErrorString cudaGetErrorString +#define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost +constexpr cudaError_t deviceSuccess = cudaSuccess; +#elif defined(SHIVA_USE_HIP) +#define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); +#define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); +#define deviceDeviceSynchronize() hipDeviceSynchronize(); +#define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND ); +#define deviceFree( PTR ) hipFree( PTR ); +#define deviceError_t hipError_t +#define deviceGetErrorString hipGetErrorString +#define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost +constexpr hipError_t deviceSuccess = hipSuccess; +#endif #endif /** @@ -100,9 +102,9 @@ void genericKernelWrapper( LAMBDA && func, bool const abortOnError = true ) #if defined(SHIVA_USE_DEVICE) // UNCRUSTIFY-OFF genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ) ); - //UNCRUSTIFY-ON + // UNCRUSTIFY-ON deviceError_t err = deviceDeviceSynchronize(); - if ( err != cudaSuccess ) + if ( err != deviceSuccess ) { printf( "Kernel failed: %s\n", deviceGetErrorString( err )); if ( abortOnError ) @@ -157,13 +159,14 @@ void genericKernelWrapper( int const N, DATA_TYPE * const hostData, LAMBDA && fu #if defined(SHIVA_USE_DEVICE) DATA_TYPE * deviceData; deviceMalloc( &deviceData, N * sizeof(DATA_TYPE) ); + deviceMemCpy( deviceData, hostData, N * sizeof(DATA_TYPE), cudaMemcpyHostToDevice ); // UNCRUSTIFY-OFF genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ), deviceData ); // UNCRUSTIFY-ON deviceError_t err = deviceDeviceSynchronize(); - deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost ); + deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), deviceMemcpyDeviceToHost ); deviceFree( deviceData ); - if ( err != cudaSuccess ) + if ( err != deviceSuccess ) { printf( "Kernel failed: %s\n", deviceGetErrorString( err )); if ( abortOnError ) diff --git a/src/common/types.hpp b/src/common/types.hpp index ff2afd0..9c1ce28 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -20,7 +20,6 @@ #include "common/ShivaMacros.hpp" /// @brief Macro to define whether or not to use camp. -#define SHIVA_USE_CAMP #if defined(SHIVA_USE_CAMP) #include #else @@ -52,7 +51,9 @@ using tuple = camp::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > -SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t ) +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE +auto +make_tuple( T && ... t ) { return camp::make_tuple( std::forward< T >( t ) ... ); } @@ -65,6 +66,7 @@ SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t ) */ template< typename ... T > using tuple = cuda::std::tuple< T ... >; +using cuda::std::get; /** * @brief Wrapper for cuda::std::make_tuple. @@ -73,6 +75,7 @@ using tuple = cuda::std::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t ) { return cuda::std::make_tuple( std::forward< T >( t ) ... ); @@ -92,7 +95,9 @@ using tuple = std::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > -auto make_tuple( T && ... t ) +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE +auto +make_tuple( T && ... t ) { return std::make_tuple( std::forward< T >( t ) ... ); } diff --git a/src/common/unitTests/testSequenceUtilities.cpp b/src/common/unitTests/testSequenceUtilities.cpp index fae3cc1..7764e96 100644 --- a/src/common/unitTests/testSequenceUtilities.cpp +++ b/src/common/unitTests/testSequenceUtilities.cpp @@ -72,7 +72,7 @@ void testNestedSequenceExpansionLambdaHelper() return ( executeSequence< 10 > ( [ h = Data::h, aa = std::integral_constant< int, a >{} ] ( auto const ... b ) constexpr - { return ( (h[aa] * h[b]) + ...); } + { return ( (h[aa] * h[b]) + ...); } ) + ... ); } ); @@ -119,7 +119,7 @@ void testSequenceExpansionTemplateLambdaHelper() kernelLaunch( [] SHIVA_HOST_DEVICE () { constexpr int staticSum0 = - executeSequence< 10 >( [&] < int ... a > () constexpr + executeSequence< 10 >( [&]< int ... a > () constexpr { return (Data::h[a] + ...); } ); @@ -139,13 +139,13 @@ void testSequenceExpansionTemplateLambdaHelper() { kernelLaunch( [] SHIVA_HOST_DEVICE () { - constexpr int staticSum0 = executeSequence< 10 >( [&] < int ... a > () constexpr + constexpr int staticSum0 = executeSequence< 10 >( [&]< int ... a > () constexpr { return ( executeSequence< 10 > ( - [ h = Data::h, aa = std::integral_constant< int, a >{} ] < int ... b > () constexpr - { return ( (h[aa] * h[b]) + ...); } + [ h = Data::h, aa = std::integral_constant< int, a >{} ]< int ... b > () constexpr + { return ( (h[aa] * h[b]) + ...); } ) + ... ); } ); @@ -167,7 +167,7 @@ void testForSequenceTemplateLambdaHelper() { int staticSum0 = 0; forSequence< 10 >( - [&] < int a > () constexpr + [&]< int a > () constexpr { staticSum0 += h[a]; } ); diff --git a/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp b/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp index 02c74e6..9d4bdb1 100644 --- a/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp +++ b/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp @@ -110,9 +110,9 @@ class ParentElement REAL_TYPE rval = {0}; forNestedSequence< BASIS_TYPE::numSupportPoints... >( [&] ( auto const ... ic_indices ) constexpr - { - rval = rval + ( value< decltype(ic_indices)::value ... >( parentCoord ) * var( decltype(ic_indices)::value ... ) ); - } ); + { + rval = rval + ( value< decltype(ic_indices)::value ... >( parentCoord ) * var( decltype(ic_indices)::value ... ) ); + } ); return rval; } @@ -130,13 +130,13 @@ class ParentElement { CArrayNd< RealType, numDims > rval = {0.0}; forNestedSequence< BASIS_TYPE::numSupportPoints... >( [&] ( auto const ... ic_indices ) constexpr - { - CArrayNd< RealType, numDims > const grad = gradient< decltype(ic_indices)::value ... >( parentCoord ); - forSequence< numDims >( [&] ( auto const a ) constexpr - { - rval( a ) = rval( a ) + grad( a ) * var( decltype(ic_indices)::value ... ); - } ); - } ); + { + CArrayNd< RealType, numDims > const grad = gradient< decltype(ic_indices)::value ... >( parentCoord ); + forSequence< numDims >( [&] ( auto const a ) constexpr + { + rval( a ) = rval( a ) + grad( a ) * var( decltype(ic_indices)::value ... ); + } ); + } ); return rval; } diff --git a/src/functions/bases/BasisProduct.hpp b/src/functions/bases/BasisProduct.hpp index b0f00ab..28f1075 100644 --- a/src/functions/bases/BasisProduct.hpp +++ b/src/functions/bases/BasisProduct.hpp @@ -89,21 +89,21 @@ struct BasisProduct { static_assert( sizeof...(BASIS_FUNCTION_INDICES) == numDims, "Wrong number of basis function indicies specified" ); - return + #if __cplusplus >= 202002L - // expand pack over number of dimensions - executeSequence< numDims >( [&] < int ... PRODUCT_TERM_INDEX > () constexpr - { - return ( BASIS_TYPE::template value< BASIS_FUNCTION_INDICES >( parentCoord[PRODUCT_TERM_INDEX] ) * ... ); - } ); + // expand pack over number of dimensions + return executeSequence< numDims >( [&]< int ... PRODUCT_TERM_INDEX > () constexpr + { + return ( BASIS_TYPE::template value< BASIS_FUNCTION_INDICES >( parentCoord[PRODUCT_TERM_INDEX] ) * ... ); + } ); #else - executeSequence< numDims >( [&] ( auto ... PRODUCT_TERM_INDEX ) constexpr - { - // fold expression to multiply the value of each BASIS_TYPE in each - // dimension. In other words the fold expands on BASIS_TYPE..., - // BASIS_FUNCTION_INDICES..., and PRODUCT_TERM_INDEX... together. - return ( BASIS_TYPES::template value< BASIS_FUNCTION_INDICES >( parentCoord[decltype(PRODUCT_TERM_INDEX)::value] ) * ... ); - } ); + return executeSequence< numDims >( [&] ( auto ... PRODUCT_TERM_INDEX ) constexpr + { + // fold expression to multiply the value of each BASIS_TYPE in each + // dimension. In other words the fold expands on BASIS_TYPE..., + // BASIS_FUNCTION_INDICES..., and PRODUCT_TERM_INDEX... together. + return ( BASIS_TYPES::template value< BASIS_FUNCTION_INDICES >( parentCoord[decltype(PRODUCT_TERM_INDEX)::value] ) * ... ); + } ); #endif } @@ -133,40 +133,40 @@ struct BasisProduct static_assert( sizeof...(BASIS_FUNCTION_INDICES) == numDims, "Wrong number of basis function indicies specified" ); #if __cplusplus >= 202002L - return executeSequence< numDims >( [&] < int ... i > () constexpr->CArrayNd< RealType, numDims > - { - auto gradientComponent = [&] ( auto const iGrad, - auto const ... PRODUCT_TERM_INDICES ) constexpr - { - // Ca - return ( gradientComponentHelper< BASIS_TYPES, - decltype(iGrad)::value, - BASIS_FUNCTION_INDICES, - PRODUCT_TERM_INDICES >( parentCoord ) * ... ); - }; - - return { (executeSequence< numDims >( gradientComponent, std::integral_constant< int, i >{} ) )... }; - } ); + return executeSequence< numDims >( [&]< int ... i > () constexpr->CArrayNd< RealType, numDims > + { + auto gradientComponent = [&] ( auto const iGrad, + auto const ... PRODUCT_TERM_INDICES ) constexpr + { + // Ca + return ( gradientComponentHelper< BASIS_TYPES, + decltype(iGrad)::value, + BASIS_FUNCTION_INDICES, + PRODUCT_TERM_INDICES >( parentCoord ) * ... ); + }; + + return { (executeSequence< numDims >( gradientComponent, std::integral_constant< int, i >{} ) )... }; + } ); #else // Expand over the dimensions. return executeSequence< numDims >( [&] ( auto ... a ) constexpr->CArrayNd< RealType, numDims > - { - // define a lambda that calculates the gradient of the basis function in - // a single dimension/direction. - auto gradientComponent = [&] ( auto GRADIENT_COMPONENT, auto ... PRODUCT_TERM_INDICES ) constexpr - { - // fold expression calling gradientComponentHelper using expanding on - // BASIS_TYPE, BASIS_FUNCTION_INDICES, and PRODUCT_TERM_INDICES. - return ( gradientComponentHelper< BASIS_TYPES, - decltype(GRADIENT_COMPONENT)::value, - BASIS_FUNCTION_INDICES, - decltype(PRODUCT_TERM_INDICES)::value >( parentCoord ) * ... ); - }; - - // execute the gradientComponent lambda on each direction, expand the - // pack on "i" corresponding to each direction of the gradient. - return { (executeSequence< numDims >( gradientComponent, a ) )... }; - } ); + { + // define a lambda that calculates the gradient of the basis function in + // a single dimension/direction. + auto gradientComponent = [&] ( auto GRADIENT_COMPONENT, auto ... PRODUCT_TERM_INDICES ) constexpr + { + // fold expression calling gradientComponentHelper using expanding on + // BASIS_TYPE, BASIS_FUNCTION_INDICES, and PRODUCT_TERM_INDICES. + return ( gradientComponentHelper< BASIS_TYPES, + decltype(GRADIENT_COMPONENT)::value, + BASIS_FUNCTION_INDICES, + decltype(PRODUCT_TERM_INDICES)::value >( parentCoord ) * ... ); + }; + + // execute the gradientComponent lambda on each direction, expand the + // pack on "i" corresponding to each direction of the gradient. + return { (executeSequence< numDims >( gradientComponent, a ) )... }; + } ); #endif } diff --git a/src/functions/bases/LagrangeBasis.hpp b/src/functions/bases/LagrangeBasis.hpp index ab2b330..e18bb4a 100644 --- a/src/functions/bases/LagrangeBasis.hpp +++ b/src/functions/bases/LagrangeBasis.hpp @@ -82,17 +82,17 @@ class LagrangeBasis : public SPACING_TYPE< REAL_TYPE, ORDER + 1 > value( REAL_TYPE const & coord ) { #if __cplusplus >= 202002L - return executeSequence< numSupportPoints >( [&] < int ... a > () constexpr - { - // return fold expression that is the product of all the polynomial - // factor terms. - return ( valueProductTerm< BF_INDEX, a >( coord ) * ... ); - } ); + return executeSequence< numSupportPoints >( [&]< int ... a > () constexpr + { + // return fold expression that is the product of all the polynomial + // factor terms. + return ( valueProductTerm< BF_INDEX, a >( coord ) * ... ); + } ); #else return executeSequence< numSupportPoints >( [&] ( auto const ... a ) constexpr - { - return ( valueProductTerm< BF_INDEX, decltype(a)::value >( coord ) * ... ); - } ); + { + return ( valueProductTerm< BF_INDEX, decltype(a)::value >( coord ) * ... ); + } ); #endif } @@ -117,28 +117,28 @@ class LagrangeBasis : public SPACING_TYPE< REAL_TYPE, ORDER + 1 > { #if __cplusplus >= 202002L - return executeSequence< numSupportPoints >( [&coord] < int ... a > () constexpr - { - auto func = [&coord] < int ... b > ( auto aa ) constexpr - { - constexpr int aVal = decltype(aa)::value; - return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< BF_INDEX, b, aVal >( coord ) * ... ); - }; - - return ( executeSequence< numSupportPoints >( func, std::integral_constant< int, a >{} ) + ... ); - } ); + return executeSequence< numSupportPoints >( [&coord]< int ... a > () constexpr + { + auto func = [&coord]< int ... b > ( auto aa ) constexpr + { + constexpr int aVal = decltype(aa)::value; + return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< BF_INDEX, b, aVal >( coord ) * ... ); + }; + + return ( executeSequence< numSupportPoints >( func, std::integral_constant< int, a >{} ) + ... ); + } ); #else return executeSequence< numSupportPoints >( [&coord] ( auto const ... a ) constexpr - { - REAL_TYPE const values[ numSupportPoints ] = { valueProductTerm< BF_INDEX, decltype(a)::value >( coord )... }; - auto func = [&values] ( auto aa, auto ... b ) constexpr - { - constexpr int aVal = decltype(aa)::value; - return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< decltype(b)::value, aVal >( values ) * ... ); - }; - - return ( executeSequence< numSupportPoints >( func, a ) + ... ); - } ); + { + REAL_TYPE const values[ numSupportPoints ] = { valueProductTerm< BF_INDEX, decltype(a)::value >( coord )... }; + auto func = [&values] ( auto aa, auto ... b ) constexpr + { + constexpr int aVal = decltype(aa)::value; + return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< decltype(b)::value, aVal >( values ) * ... ); + }; + + return ( executeSequence< numSupportPoints >( func, a ) + ... ); + } ); #endif } diff --git a/src/functions/quadrature/Quadrature.hpp b/src/functions/quadrature/Quadrature.hpp index d0c466e..1c43301 100644 --- a/src/functions/quadrature/Quadrature.hpp +++ b/src/functions/quadrature/Quadrature.hpp @@ -53,13 +53,13 @@ struct QuadratureGaussLegendre : public GaussLegendreSpacing< REAL_TYPE, N > } else if constexpr ( N == 3 ) { - assert( index >= 0 && index < 3 ); + //assert( index >= 0 && index < 3 ); return 0.5555555555555555555555555555555556 + 0.3333333333333333333333333333333333 * ( index & 1 ); } else if constexpr ( N == 4 ) { - assert( index >= 0 && index < 4 ); + //assert( index >= 0 && index < 4 ); return 0.5 + ( -1 + ( ( ( index + 1 ) & 2 ) ) ) * 0.15214515486254614262693605077800059277; } return std::numeric_limits< REAL_TYPE >::max(); @@ -145,17 +145,17 @@ struct QuadratureGaussLobatto : public GaussLobattoSpacing< REAL_TYPE, N > } else if constexpr ( N == 3 ) { - assert( index >= 0 && index < 3 ); + //assert( index >= 0 && index < 3 ); return 0.3333333333333333333333333333333333 + ( index & 1 ); } else if constexpr ( N == 4 ) { - assert( index >= 0 && index < 4 ); + //assert( index >= 0 && index < 4 ); return 0.1666666666666666666666666666666667 + ( ((index + 1) & 2) >> 1 ) * 0.6666666666666666666666666666666667; } else if constexpr ( N == 5 ) { - assert( index >= 0 && index < 5 ); + //assert( index >= 0 && index < 5 ); return 0.1 + (index & 1) * 0.4444444444444444444444444444444444 + !( index - 2 ) * 0.6111111111111111111111111111111111; } return std::numeric_limits< REAL_TYPE >::max(); diff --git a/src/geometry/mapping/LinearTransform.hpp b/src/geometry/mapping/LinearTransform.hpp index 86b9536..7fde65f 100644 --- a/src/geometry/mapping/LinearTransform.hpp +++ b/src/geometry/mapping/LinearTransform.hpp @@ -169,17 +169,17 @@ jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform, auto const & nodeCoords = transform.getData(); InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr - { - CArrayNd< REAL_TYPE, DIMS > const dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( pointCoordsParent ); - // dimensional loop from domain to codomain - forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr - { - constexpr int i = decltype(ici)::value; - constexpr int j = decltype(icj)::value; - J( i, j ) = J( i, j ) + dNadXi( j ) * nodeCoords( decltype(ic_spIndices)::value ..., i ); - } ); - - } ); + { + CArrayNd< REAL_TYPE, DIMS > const dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( pointCoordsParent ); + // dimensional loop from domain to codomain + forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr + { + constexpr int i = decltype(ici)::value; + constexpr int j = decltype(icj)::value; + J( i, j ) = J( i, j ) + dNadXi( j ) * nodeCoords( decltype(ic_spIndices)::value ..., i ); + } ); + + } ); } @@ -208,35 +208,35 @@ SHIVA_STATIC_CONSTEXPR_HOSTDEVICE_FORCEINLINE void jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform, typename LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE >::JacobianType & J ) { - using Transform = std::remove_reference_t< decltype(transform) >; + using Transform = LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE >; using InterpolatedShape = typename Transform::InterpolatedShape; constexpr int DIMS = Transform::numDims; auto const & nodeCoords = transform.getData(); - constexpr double qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... }; + constexpr REAL_TYPE qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... }; InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr - { - constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords ); - - // dimensional loop from domain to codomain - #if 0 - forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr - { - constexpr int i = decltype(ici)::value; - constexpr int j = decltype(icj)::value; - J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); - } ); + { + constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords ); + + // dimensional loop from domain to codomain + #if 1 + forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr + { + constexpr int i = decltype(ici)::value; + constexpr int j = decltype(icj)::value; + J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); + } ); #else - for ( int j = 0; j < DIMS; ++j ) - { - for ( int i = 0; i < DIMS; ++i ) - { - J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); - } - } + for ( int j = 0; j < DIMS; ++j ) + { + for ( int i = 0; i < DIMS; ++i ) + { + J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); + } + } #endif - } ); + } ); } diff --git a/src/geometry/mapping/unitTests/testLinearTransform.cpp b/src/geometry/mapping/unitTests/testLinearTransform.cpp index 9087bb6..5a575bf 100644 --- a/src/geometry/mapping/unitTests/testLinearTransform.cpp +++ b/src/geometry/mapping/unitTests/testLinearTransform.cpp @@ -305,7 +305,13 @@ void testInvJacobianFunctionReturnByValueHelper() auto cell = makeLinearTransform( Xref ); for ( int q = 0; q < 8; ++q ) { +#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12 + auto tmp = inverseJacobian( cell, qCoords[q] ); + auto detJ = shiva::get< 0 >( tmp ); + auto invJ = shiva::get< 1 >( tmp ); +#else auto [ detJ, invJ ] = inverseJacobian( cell, qCoords[q] ); +#endif kernelData[ 10 * q ] = detJ; for ( int i = 0; i < 3; ++i ) diff --git a/src/geometry/mapping/unitTests/testScaling.cpp b/src/geometry/mapping/unitTests/testScaling.cpp index 91f8129..b83bbb2 100644 --- a/src/geometry/mapping/unitTests/testScaling.cpp +++ b/src/geometry/mapping/unitTests/testScaling.cpp @@ -143,7 +143,13 @@ void testInvJacobianFunctionReturnByValueHelper() { auto cell = makeScaling( h ); - auto [ detJ, invJ ] = inverseJacobian( cell ); +#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12 + auto tmp = inverseJacobian( cell ); + auto detJ = shiva::get< 0 >( tmp ); + auto invJ = shiva::get< 1 >( tmp ); +#else + auto [detJ, invJ] = inverseJacobian( cell ); +#endif kdata[0] = detJ; kdata[1] = invJ( 0 ); kdata[2] = invJ( 1 ); diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp index bc3c93a..543ae70 100644 --- a/src/geometry/mapping/unitTests/testUniformScaling.cpp +++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp @@ -99,7 +99,14 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue ) double const h = 3.14; auto cell = makeUniformScaling( h ); - auto [ detJ, invJ ] = inverseJacobian( cell ); + // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple +#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12 + auto tmp = inverseJacobian( cell ); + auto detJ = shiva::get< 0 >( tmp ); + auto invJ = shiva::get< 1 >( tmp ); +#else + auto [detJ, invJ] = inverseJacobian( cell ); +#endif EXPECT_EQ( detJ, 0.125 * h * h * h ); EXPECT_EQ( invJ( 0 ), ( 2 / h ) ); }