From 86f6622339ee7896a5433e780518de43ac1c087f Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Sun, 16 Mar 2025 15:48:25 -0700 Subject: [PATCH 01/26] comment out assert calls which are not constexpr --- src/functions/quadrature/Quadrature.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/functions/quadrature/Quadrature.hpp b/src/functions/quadrature/Quadrature.hpp index 0e988cf..ea0dc39 100644 --- a/src/functions/quadrature/Quadrature.hpp +++ b/src/functions/quadrature/Quadrature.hpp @@ -53,13 +53,13 @@ struct QuadratureGaussLegendre : public GaussLegendreSpacing< REAL_TYPE, N > } else if constexpr ( N == 3 ) { - assert( index >= 0 && index < 3 ); + //assert( index >= 0 && index < 3 ); return 0.5555555555555555555555555555555556 + 0.3333333333333333333333333333333333 * ( index & 1 ); } else if constexpr ( N == 4 ) { - assert( index >= 0 && index < 4 ); + //assert( index >= 0 && index < 4 ); return 0.5 + ( -1 + ( ( ( index + 1 ) & 2 ) ) ) * 0.15214515486254614262693605077800059277; } return std::numeric_limits::max(); @@ -145,20 +145,20 @@ struct QuadratureGaussLobatto : public GaussLobattoSpacing< REAL_TYPE, N > } else if constexpr ( N == 3 ) { - assert( index >= 0 && index < 3 ); + //assert( index >= 0 && index < 3 ); return 0.3333333333333333333333333333333333 + ( index & 1 ); } else if constexpr ( N == 4 ) { - assert( index >= 0 && index < 4 ); + //assert( index >= 0 && index < 4 ); return 0.1666666666666666666666666666666667 + ( ((index + 1) & 2) >> 1 ) * 0.6666666666666666666666666666666667; } else if constexpr ( N == 5 ) { - assert( index >= 0 && index < 5 ); + //assert( index >= 0 && index < 5 ); return 0.1 + (index & 1) * 0.4444444444444444444444444444444444 + !( index - 2 ) * 0.6111111111111111111111111111111111; } - return std::numeric_limits::max(); + return 0;//std::numeric_limits::max(); } /** From 5d873d1406ed63cecefb7211b1086b16ad6a63bd Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Sat, 22 Mar 2025 16:34:02 -0700 Subject: [PATCH 02/26] make camp more optional --- CMakeLists.txt | 12 +++++++----- cmake/CMakeBasics.cmake | 1 + cmake/Config.cmake | 3 ++- docs/doxygen/ShivaConfig.hpp | 2 ++ src/ShivaConfig.hpp.in | 4 +++- src/common/types.hpp | 1 - 6 files changed, 15 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab43b11..dc68e5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,13 +65,15 @@ endif() include( cmake/Macros.cmake ) include( cmake/Config.cmake ) - add_subdirectory( src ) -add_subdirectory( tpl/camp ) -target_compile_options( camp PRIVATE "-Wno-shadow") -configure_file(tpl/camp/include/camp/config.in.hpp - ${PROJECT_BINARY_DIR}/include/camp/config.hpp) +if( SHIVA_ENABLE_CAMP ) + add_subdirectory( tpl/camp ) + target_compile_options( camp PRIVATE "-Wno-shadow") + + configure_file(tpl/camp/include/camp/config.in.hpp + ${PROJECT_BINARY_DIR}/include/camp/config.hpp) +endif() if( SHIVA_ENABLE_DOCS ) diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake index 93a1c1b..0fe1b85 100644 --- a/cmake/CMakeBasics.cmake +++ b/cmake/CMakeBasics.cmake @@ -25,4 +25,5 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG CLANG "-fstandalone-debug" ) +set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "") set( CAMP_ENABLE_TESTS OFF CACHE BOOL "") diff --git a/cmake/Config.cmake b/cmake/Config.cmake index 24e78c7..039c2c1 100644 --- a/cmake/Config.cmake +++ b/cmake/Config.cmake @@ -1,11 +1,12 @@ # set( PREPROCESSOR_DEFINES CUDA HIP + CAMP ) set( USE_CONFIGFILE ON CACHE BOOL "" ) foreach( DEP in ${PREPROCESSOR_DEFINES}) - if( ${DEP}_FOUND OR ENABLE_${DEP} ) + if( ${DEP}_FOUND OR ENABLE_${DEP} OR SHIVA_ENABLE_${DEP} ) set( SHIVA_USE_${DEP} TRUE ) endif() endforeach() diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index d89803f..119e591 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -13,3 +13,5 @@ /* #undef SHIVA_USE_HIP */ /* #undef SHIVA_USE_CALIPER */ + +/* #undef SHIVA_USE_CAMP */ diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in index 6c01b1d..f1380d3 100644 --- a/src/ShivaConfig.hpp.in +++ b/src/ShivaConfig.hpp.in @@ -12,4 +12,6 @@ #cmakedefine SHIVA_USE_HIP -#cmakedefine SHIVA_USE_CALIPER \ No newline at end of file +#cmakedefine SHIVA_USE_CALIPER + +#cmakedefine SHIVA_USE_CAMP \ No newline at end of file diff --git a/src/common/types.hpp b/src/common/types.hpp index ff2afd0..26e22f7 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -20,7 +20,6 @@ #include "common/ShivaMacros.hpp" /// @brief Macro to define whether or not to use camp. -#define SHIVA_USE_CAMP #if defined(SHIVA_USE_CAMP) #include #else From abed5585b6c037703378aadd235b997a4310d954 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Sat, 29 Mar 2025 10:49:38 +0900 Subject: [PATCH 03/26] some cmake fixes --- CMakeLists.txt | 14 +++++++++++--- src/CMakeLists.txt | 3 ++- src/common/CMakeLists.txt | 8 ++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dc68e5a..1ab0fa6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,12 +15,12 @@ set( SHIVA_VERSION_PATCHLEVEL 0 ) # check if Shiva is build as a submodule or a separate project get_directory_property( parent_dir PARENT_DIRECTORY ) if(parent_dir) - set( is_submodule ON ) + set( SHIVA_IS_SUBMODULE ON ) else() - set( is_submodule OFF ) + set( SHIVA_IS_SUBMODULE OFF ) endif() -if( NOT is_submodule ) +if( NOT SHIVA_IS_SUBMODULE ) message( "not a submodule") project( Shiva LANGUAGES CXX C ) @@ -65,6 +65,14 @@ endif() include( cmake/Macros.cmake ) include( cmake/Config.cmake ) + +set(SHIVA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) +set(SHIVA_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) + +message( STATUS "SHIVA_BINARY_DIR: ${SHIVA_BINARY_DIR}" ) +message( STATUS "SHIVA_SOURCE_DIR: ${SHIVA_SOURCE_DIR}" ) + + add_subdirectory( src ) if( SHIVA_ENABLE_CAMP ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ca02250..b680057 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,7 +23,8 @@ blt_add_library( NAME shiva target_include_directories( shiva INTERFACE - $ + $ + $ $ ) install( FILES ${shiva_headers} diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9c684d5..f754295 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -34,14 +34,14 @@ blt_add_library( NAME common target_include_directories( common INTERFACE - $ - $ + $ + $ $ ) target_include_directories( common SYSTEM INTERFACE - $ - $ ) + $ + $ ) install( FILES ${common_headers} DESTINATION include/common ) From 9c3f0d022fbba3a47d50ac2fe7441d17c71c4f46 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Sat, 29 Mar 2025 11:36:34 +0900 Subject: [PATCH 04/26] fixes for absolute paths in aggregateOrSplit.py --- scripts/aggregateOrSplit.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/aggregateOrSplit.py b/scripts/aggregateOrSplit.py index 89baa12..251174e 100644 --- a/scripts/aggregateOrSplit.py +++ b/scripts/aggregateOrSplit.py @@ -20,11 +20,13 @@ def create_dependency_graph(self, header, include_paths=None): if include_paths is None: include_paths = [] + header = os.path.abspath(header) # Normalize here + if header in self.dependencies: return # Already processed self.dependencies[header] = set() - base_path = os.path.dirname(os.path.abspath(header)) # Base directory of the current header + base_path = os.path.dirname(header) # Base directory of the current header try: with open(header, 'r') as file: @@ -34,10 +36,10 @@ def create_dependency_graph(self, header, include_paths=None): included_file = include_match.group(1) if included_file != self.config_file: - resolved_path = self.resolve_path( - included_file, base_path, include_paths) + resolved_path = self.resolve_path( included_file, base_path, include_paths) if resolved_path: + resolved_path = os.path.abspath(resolved_path) self.dependencies[header].add(resolved_path) if os.path.exists(resolved_path): @@ -82,16 +84,21 @@ def resolve_path(self, included_file, base_path, include_paths): return None # Return None if no resolution was possible + def generate_header_list(self): remaining_dependencies = self.dependencies.copy() size_of_remaining_dependencies = len(remaining_dependencies) + unique_files = set() # Track unique files by absolute path while size_of_remaining_dependencies > 0: local_included = [] for key in remaining_dependencies: if len(remaining_dependencies[key]) == 0: - self.included_list.append(key) + abs_key = os.path.abspath(key) + if abs_key not in unique_files: + self.included_list.append(abs_key) + unique_files.add(abs_key) local_included.append(key) for included_key in local_included: @@ -111,6 +118,7 @@ def process_header(header_path, output): """ Processes a single header file, commenting out includes and pragmas. """ + header_path = os.path.abspath(header_path) if header_path in self.included: return # Avoid duplicate processing self.included.add(header_path) @@ -133,6 +141,7 @@ def process_header(header_path, output): with open(output_file, 'w') as output: for header in headers: + header = os.path.abspath(header) self.create_dependency_graph(header, include_paths) for header in self.dependencies: From 6ff1f747b22b2d558bf26a9b0b3076507d7c0e35 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Mon, 7 Apr 2025 07:21:32 -0700 Subject: [PATCH 05/26] updated pmpl for data transfer --- src/common/pmpl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index 1368418..812fc1b 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -132,6 +132,7 @@ void genericKernelWrapper( int const N, DATA_TYPE * const hostData, LAMBDA && fu #if defined(SHIVA_USE_DEVICE) DATA_TYPE * deviceData; deviceMalloc( &deviceData, N * sizeof(DATA_TYPE) ); + deviceMemCpy( deviceData, hostData, N * sizeof(DATA_TYPE), cudaMemcpyHostToDevice ); genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ), deviceData ); deviceDeviceSynchronize(); deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost ); From 0582eedc60675205b53539fb070173e34f259121 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Sat, 10 May 2025 13:57:32 -0700 Subject: [PATCH 06/26] use html url for submodules to avoid issues TotalEnergies staff were having with relative paths --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 000410f..a546177 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "cmake/blt"] path = cmake/blt - url = ../../LLNL/blt.git + url = https://github.com/LLNL/blt.git [submodule "tpl/camp"] path = tpl/camp - url = ../../LLNL/camp.git + url = https://github.com/LLNL/camp.git From 64a16a79d733f7e0da02e06aef403dfaf69eb654 Mon Sep 17 00:00:00 2001 From: Randolph R Settgast Date: Tue, 20 May 2025 09:38:04 -0500 Subject: [PATCH 07/26] change looping strategy for jacobian to static --- docs/doxygen/ShivaConfig.hpp | 2 +- src/geometry/mapping/LinearTransform.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index fdee9a6..5187f6d 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -8,7 +8,7 @@ #define SHIVA_VERSION_PATCHLEVEL 0 -/* #undef SHIVA_USE_CUDA */ +#define SHIVA_USE_CUDA /* #undef SHIVA_USE_HIP */ diff --git a/src/geometry/mapping/LinearTransform.hpp b/src/geometry/mapping/LinearTransform.hpp index 86b9536..0b8c2a4 100644 --- a/src/geometry/mapping/LinearTransform.hpp +++ b/src/geometry/mapping/LinearTransform.hpp @@ -220,7 +220,7 @@ jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform, constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords ); // dimensional loop from domain to codomain - #if 0 + #if 1 forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr { constexpr int i = decltype(ici)::value; From 8abcdf3846bdb5c88fd921d8240736a2f1dca99a Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Tue, 20 May 2025 20:31:36 -0700 Subject: [PATCH 08/26] templatize hard coded real types --- docs/doxygen/ShivaConfig.hpp | 2 +- src/geometry/mapping/LinearTransform.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index 5187f6d..fdee9a6 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -8,7 +8,7 @@ #define SHIVA_VERSION_PATCHLEVEL 0 -#define SHIVA_USE_CUDA +/* #undef SHIVA_USE_CUDA */ /* #undef SHIVA_USE_HIP */ diff --git a/src/geometry/mapping/LinearTransform.hpp b/src/geometry/mapping/LinearTransform.hpp index 0b8c2a4..a44d7d6 100644 --- a/src/geometry/mapping/LinearTransform.hpp +++ b/src/geometry/mapping/LinearTransform.hpp @@ -208,12 +208,12 @@ SHIVA_STATIC_CONSTEXPR_HOSTDEVICE_FORCEINLINE void jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform, typename LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE >::JacobianType & J ) { - using Transform = std::remove_reference_t< decltype(transform) >; + using Transform = LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE >; using InterpolatedShape = typename Transform::InterpolatedShape; constexpr int DIMS = Transform::numDims; auto const & nodeCoords = transform.getData(); - constexpr double qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... }; + constexpr REAL_TYPE qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... }; InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr { From ece7de77636e12d6662a64947bec0758e44fa0cd Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Wed, 10 Sep 2025 23:11:44 +0900 Subject: [PATCH 09/26] some code review suggestions --- src/common/pmpl.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index fd0af6a..7a4346e 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -37,6 +37,8 @@ namespace shiva #define deviceError_t cudaError_t #define deviceSuccess cudaSuccess #define deviceGetErrorString cudaGetErrorString + #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost + #define deviceSuccess = cudaSuccess; #elif defined(SHIVA_USE_HIP) #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); @@ -46,6 +48,8 @@ namespace shiva #define deviceError_t hipError_t #define deviceSuccess = hipSuccess; #define deviceGetErrorString hipGetErrorString + #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost + #define deviceSuccess = hipSuccess; #endif #endif @@ -102,7 +106,7 @@ void genericKernelWrapper( LAMBDA && func, bool const abortOnError = true ) genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ) ); // UNCRUSTIFY-ON deviceError_t err = deviceDeviceSynchronize(); - if ( err != cudaSuccess ) + if ( err != deviceSuccess ) { printf( "Kernel failed: %s\n", deviceGetErrorString( err )); if ( abortOnError ) @@ -162,9 +166,9 @@ void genericKernelWrapper( int const N, DATA_TYPE * const hostData, LAMBDA && fu genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ), deviceData ); // UNCRUSTIFY-ON deviceError_t err = deviceDeviceSynchronize(); - deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost ); + deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), deviceMemcpyDeviceToHost ); deviceFree( deviceData ); - if ( err != cudaSuccess ) + if ( err != deviceSuccess ) { printf( "Kernel failed: %s\n", deviceGetErrorString( err )); if ( abortOnError ) From 1c19a835dfb59736dbf17ecd40300a1956e67382 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Wed, 10 Sep 2025 23:19:46 +0900 Subject: [PATCH 10/26] fix bug --- src/common/pmpl.hpp | 4 ++-- src/functions/quadrature/Quadrature.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index 7a4346e..17fdb51 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -38,7 +38,7 @@ namespace shiva #define deviceSuccess cudaSuccess #define deviceGetErrorString cudaGetErrorString #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost - #define deviceSuccess = cudaSuccess; + #define deviceSuccess cudaSuccess; #elif defined(SHIVA_USE_HIP) #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); @@ -49,7 +49,7 @@ namespace shiva #define deviceSuccess = hipSuccess; #define deviceGetErrorString hipGetErrorString #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost - #define deviceSuccess = hipSuccess; + #define deviceSuccess hipSuccess; #endif #endif diff --git a/src/functions/quadrature/Quadrature.hpp b/src/functions/quadrature/Quadrature.hpp index c9e7d5b..1c43301 100644 --- a/src/functions/quadrature/Quadrature.hpp +++ b/src/functions/quadrature/Quadrature.hpp @@ -158,7 +158,7 @@ struct QuadratureGaussLobatto : public GaussLobattoSpacing< REAL_TYPE, N > //assert( index >= 0 && index < 5 ); return 0.1 + (index & 1) * 0.4444444444444444444444444444444444 + !( index - 2 ) * 0.6111111111111111111111111111111111; } - return 0;//std::numeric_limits< REAL_TYPE >::max(); + return std::numeric_limits< REAL_TYPE >::max(); } /** From f9232c3f6f4be4221496fde45f83682843bd16a2 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Wed, 10 Sep 2025 23:26:54 +0900 Subject: [PATCH 11/26] try to fix bug again --- src/common/pmpl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index 17fdb51..ce754ab 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -38,7 +38,7 @@ namespace shiva #define deviceSuccess cudaSuccess #define deviceGetErrorString cudaGetErrorString #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost - #define deviceSuccess cudaSuccess; + using deviceSuccess cudaSuccess; #elif defined(SHIVA_USE_HIP) #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); @@ -49,7 +49,7 @@ namespace shiva #define deviceSuccess = hipSuccess; #define deviceGetErrorString hipGetErrorString #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost - #define deviceSuccess hipSuccess; + using deviceSuccess hipSuccess; #endif #endif From a70ffbea109cb65472eaaa74f5b719801d309349 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Wed, 10 Sep 2025 23:31:43 +0900 Subject: [PATCH 12/26] try to fix bug again --- src/common/pmpl.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index ce754ab..85ec642 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -35,10 +35,9 @@ namespace shiva #define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND ); #define deviceFree( PTR ) cudaFree( PTR ); #define deviceError_t cudaError_t - #define deviceSuccess cudaSuccess #define deviceGetErrorString cudaGetErrorString #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost - using deviceSuccess cudaSuccess; + using deviceSuccess = cudaSuccess; #elif defined(SHIVA_USE_HIP) #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); @@ -46,10 +45,9 @@ namespace shiva #define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND ); #define deviceFree( PTR ) hipFree( PTR ); #define deviceError_t hipError_t - #define deviceSuccess = hipSuccess; #define deviceGetErrorString hipGetErrorString #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost - using deviceSuccess hipSuccess; + using deviceSuccess = hipSuccess; #endif #endif From 8b0f231a6712b11dd02a4e83c0b82e8c685f3779 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Wed, 10 Sep 2025 23:40:51 +0900 Subject: [PATCH 13/26] try to fix bug again --- src/common/pmpl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index 85ec642..7a6aa80 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -37,7 +37,7 @@ namespace shiva #define deviceError_t cudaError_t #define deviceGetErrorString cudaGetErrorString #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost - using deviceSuccess = cudaSuccess; + constexpr cudaError_t deviceSuccess = cudaSuccess; #elif defined(SHIVA_USE_HIP) #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); @@ -47,7 +47,7 @@ namespace shiva #define deviceError_t hipError_t #define deviceGetErrorString hipGetErrorString #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost - using deviceSuccess = hipSuccess; + constexpr hipError_t deviceSuccess = hipSuccess; #endif #endif From b9cb71c056a123e5bf8f549d381336deb20b1471 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Wed, 10 Sep 2025 23:46:57 +0900 Subject: [PATCH 14/26] try to fix bug again --- src/common/types.hpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/common/types.hpp b/src/common/types.hpp index 26e22f7..78f9704 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -51,7 +51,9 @@ using tuple = camp::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > -SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t ) +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE +auto +make_tuple( T && ... t ) { return camp::make_tuple( std::forward< T >( t ) ... ); } @@ -72,6 +74,7 @@ using tuple = cuda::std::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t ) { return cuda::std::make_tuple( std::forward< T >( t ) ... ); @@ -91,7 +94,9 @@ using tuple = std::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > -auto make_tuple( T && ... t ) +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE +auto +make_tuple( T && ... t ) { return std::make_tuple( std::forward< T >( t ) ... ); } From 7de0c15a130304350f8fa0dc0544b1c0710484c9 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Thu, 11 Sep 2025 14:48:48 +0900 Subject: [PATCH 15/26] cuda/std/tuple workaround and uncrustify --- docs/doxygen/ShivaConfig.hpp | 2 +- src/common/pmpl.hpp | 42 ++++----- src/common/types.hpp | 4 +- .../unitTests/testSequenceUtilities.cpp | 12 +-- .../parentElements/ParentElement.hpp | 20 ++--- src/functions/bases/BasisProduct.hpp | 88 +++++++++---------- src/functions/bases/LagrangeBasis.hpp | 58 ++++++------ src/geometry/mapping/LinearTransform.hpp | 56 ++++++------ .../mapping/unitTests/testUniformScaling.cpp | 9 +- 9 files changed, 149 insertions(+), 142 deletions(-) diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index fdee9a6..4ac8dd9 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -14,6 +14,6 @@ /* #undef SHIVA_USE_CALIPER */ -/* #undef SHIVA_USE_CAMP */ +#define SHIVA_USE_CAMP #define SHIVA_USE_BOUNDS_CHECK diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp index 7a6aa80..eb8d3b8 100644 --- a/src/common/pmpl.hpp +++ b/src/common/pmpl.hpp @@ -28,27 +28,27 @@ namespace shiva { #if defined(SHIVA_USE_DEVICE) - #if defined(SHIVA_USE_CUDA) - #define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES ); - #define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES ); - #define deviceDeviceSynchronize() cudaDeviceSynchronize(); - #define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND ); - #define deviceFree( PTR ) cudaFree( PTR ); - #define deviceError_t cudaError_t - #define deviceGetErrorString cudaGetErrorString - #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost - constexpr cudaError_t deviceSuccess = cudaSuccess; - #elif defined(SHIVA_USE_HIP) - #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); - #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); - #define deviceDeviceSynchronize() hipDeviceSynchronize(); - #define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND ); - #define deviceFree( PTR ) hipFree( PTR ); - #define deviceError_t hipError_t - #define deviceGetErrorString hipGetErrorString - #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost - constexpr hipError_t deviceSuccess = hipSuccess; - #endif +#if defined(SHIVA_USE_CUDA) +#define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES ); +#define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES ); +#define deviceDeviceSynchronize() cudaDeviceSynchronize(); +#define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND ); +#define deviceFree( PTR ) cudaFree( PTR ); +#define deviceError_t cudaError_t +#define deviceGetErrorString cudaGetErrorString +#define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost +constexpr cudaError_t deviceSuccess = cudaSuccess; +#elif defined(SHIVA_USE_HIP) +#define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES ); +#define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES ); +#define deviceDeviceSynchronize() hipDeviceSynchronize(); +#define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND ); +#define deviceFree( PTR ) hipFree( PTR ); +#define deviceError_t hipError_t +#define deviceGetErrorString hipGetErrorString +#define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost +constexpr hipError_t deviceSuccess = hipSuccess; +#endif #endif /** diff --git a/src/common/types.hpp b/src/common/types.hpp index 78f9704..5ef42c8 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -51,8 +51,8 @@ using tuple = camp::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > -SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE -auto +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE +auto make_tuple( T && ... t ) { return camp::make_tuple( std::forward< T >( t ) ... ); diff --git a/src/common/unitTests/testSequenceUtilities.cpp b/src/common/unitTests/testSequenceUtilities.cpp index fae3cc1..7764e96 100644 --- a/src/common/unitTests/testSequenceUtilities.cpp +++ b/src/common/unitTests/testSequenceUtilities.cpp @@ -72,7 +72,7 @@ void testNestedSequenceExpansionLambdaHelper() return ( executeSequence< 10 > ( [ h = Data::h, aa = std::integral_constant< int, a >{} ] ( auto const ... b ) constexpr - { return ( (h[aa] * h[b]) + ...); } + { return ( (h[aa] * h[b]) + ...); } ) + ... ); } ); @@ -119,7 +119,7 @@ void testSequenceExpansionTemplateLambdaHelper() kernelLaunch( [] SHIVA_HOST_DEVICE () { constexpr int staticSum0 = - executeSequence< 10 >( [&] < int ... a > () constexpr + executeSequence< 10 >( [&]< int ... a > () constexpr { return (Data::h[a] + ...); } ); @@ -139,13 +139,13 @@ void testSequenceExpansionTemplateLambdaHelper() { kernelLaunch( [] SHIVA_HOST_DEVICE () { - constexpr int staticSum0 = executeSequence< 10 >( [&] < int ... a > () constexpr + constexpr int staticSum0 = executeSequence< 10 >( [&]< int ... a > () constexpr { return ( executeSequence< 10 > ( - [ h = Data::h, aa = std::integral_constant< int, a >{} ] < int ... b > () constexpr - { return ( (h[aa] * h[b]) + ...); } + [ h = Data::h, aa = std::integral_constant< int, a >{} ]< int ... b > () constexpr + { return ( (h[aa] * h[b]) + ...); } ) + ... ); } ); @@ -167,7 +167,7 @@ void testForSequenceTemplateLambdaHelper() { int staticSum0 = 0; forSequence< 10 >( - [&] < int a > () constexpr + [&]< int a > () constexpr { staticSum0 += h[a]; } ); diff --git a/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp b/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp index 02c74e6..9d4bdb1 100644 --- a/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp +++ b/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp @@ -110,9 +110,9 @@ class ParentElement REAL_TYPE rval = {0}; forNestedSequence< BASIS_TYPE::numSupportPoints... >( [&] ( auto const ... ic_indices ) constexpr - { - rval = rval + ( value< decltype(ic_indices)::value ... >( parentCoord ) * var( decltype(ic_indices)::value ... ) ); - } ); + { + rval = rval + ( value< decltype(ic_indices)::value ... >( parentCoord ) * var( decltype(ic_indices)::value ... ) ); + } ); return rval; } @@ -130,13 +130,13 @@ class ParentElement { CArrayNd< RealType, numDims > rval = {0.0}; forNestedSequence< BASIS_TYPE::numSupportPoints... >( [&] ( auto const ... ic_indices ) constexpr - { - CArrayNd< RealType, numDims > const grad = gradient< decltype(ic_indices)::value ... >( parentCoord ); - forSequence< numDims >( [&] ( auto const a ) constexpr - { - rval( a ) = rval( a ) + grad( a ) * var( decltype(ic_indices)::value ... ); - } ); - } ); + { + CArrayNd< RealType, numDims > const grad = gradient< decltype(ic_indices)::value ... >( parentCoord ); + forSequence< numDims >( [&] ( auto const a ) constexpr + { + rval( a ) = rval( a ) + grad( a ) * var( decltype(ic_indices)::value ... ); + } ); + } ); return rval; } diff --git a/src/functions/bases/BasisProduct.hpp b/src/functions/bases/BasisProduct.hpp index b0f00ab..28f1075 100644 --- a/src/functions/bases/BasisProduct.hpp +++ b/src/functions/bases/BasisProduct.hpp @@ -89,21 +89,21 @@ struct BasisProduct { static_assert( sizeof...(BASIS_FUNCTION_INDICES) == numDims, "Wrong number of basis function indicies specified" ); - return + #if __cplusplus >= 202002L - // expand pack over number of dimensions - executeSequence< numDims >( [&] < int ... PRODUCT_TERM_INDEX > () constexpr - { - return ( BASIS_TYPE::template value< BASIS_FUNCTION_INDICES >( parentCoord[PRODUCT_TERM_INDEX] ) * ... ); - } ); + // expand pack over number of dimensions + return executeSequence< numDims >( [&]< int ... PRODUCT_TERM_INDEX > () constexpr + { + return ( BASIS_TYPE::template value< BASIS_FUNCTION_INDICES >( parentCoord[PRODUCT_TERM_INDEX] ) * ... ); + } ); #else - executeSequence< numDims >( [&] ( auto ... PRODUCT_TERM_INDEX ) constexpr - { - // fold expression to multiply the value of each BASIS_TYPE in each - // dimension. In other words the fold expands on BASIS_TYPE..., - // BASIS_FUNCTION_INDICES..., and PRODUCT_TERM_INDEX... together. - return ( BASIS_TYPES::template value< BASIS_FUNCTION_INDICES >( parentCoord[decltype(PRODUCT_TERM_INDEX)::value] ) * ... ); - } ); + return executeSequence< numDims >( [&] ( auto ... PRODUCT_TERM_INDEX ) constexpr + { + // fold expression to multiply the value of each BASIS_TYPE in each + // dimension. In other words the fold expands on BASIS_TYPE..., + // BASIS_FUNCTION_INDICES..., and PRODUCT_TERM_INDEX... together. + return ( BASIS_TYPES::template value< BASIS_FUNCTION_INDICES >( parentCoord[decltype(PRODUCT_TERM_INDEX)::value] ) * ... ); + } ); #endif } @@ -133,40 +133,40 @@ struct BasisProduct static_assert( sizeof...(BASIS_FUNCTION_INDICES) == numDims, "Wrong number of basis function indicies specified" ); #if __cplusplus >= 202002L - return executeSequence< numDims >( [&] < int ... i > () constexpr->CArrayNd< RealType, numDims > - { - auto gradientComponent = [&] ( auto const iGrad, - auto const ... PRODUCT_TERM_INDICES ) constexpr - { - // Ca - return ( gradientComponentHelper< BASIS_TYPES, - decltype(iGrad)::value, - BASIS_FUNCTION_INDICES, - PRODUCT_TERM_INDICES >( parentCoord ) * ... ); - }; - - return { (executeSequence< numDims >( gradientComponent, std::integral_constant< int, i >{} ) )... }; - } ); + return executeSequence< numDims >( [&]< int ... i > () constexpr->CArrayNd< RealType, numDims > + { + auto gradientComponent = [&] ( auto const iGrad, + auto const ... PRODUCT_TERM_INDICES ) constexpr + { + // Ca + return ( gradientComponentHelper< BASIS_TYPES, + decltype(iGrad)::value, + BASIS_FUNCTION_INDICES, + PRODUCT_TERM_INDICES >( parentCoord ) * ... ); + }; + + return { (executeSequence< numDims >( gradientComponent, std::integral_constant< int, i >{} ) )... }; + } ); #else // Expand over the dimensions. return executeSequence< numDims >( [&] ( auto ... a ) constexpr->CArrayNd< RealType, numDims > - { - // define a lambda that calculates the gradient of the basis function in - // a single dimension/direction. - auto gradientComponent = [&] ( auto GRADIENT_COMPONENT, auto ... PRODUCT_TERM_INDICES ) constexpr - { - // fold expression calling gradientComponentHelper using expanding on - // BASIS_TYPE, BASIS_FUNCTION_INDICES, and PRODUCT_TERM_INDICES. - return ( gradientComponentHelper< BASIS_TYPES, - decltype(GRADIENT_COMPONENT)::value, - BASIS_FUNCTION_INDICES, - decltype(PRODUCT_TERM_INDICES)::value >( parentCoord ) * ... ); - }; - - // execute the gradientComponent lambda on each direction, expand the - // pack on "i" corresponding to each direction of the gradient. - return { (executeSequence< numDims >( gradientComponent, a ) )... }; - } ); + { + // define a lambda that calculates the gradient of the basis function in + // a single dimension/direction. + auto gradientComponent = [&] ( auto GRADIENT_COMPONENT, auto ... PRODUCT_TERM_INDICES ) constexpr + { + // fold expression calling gradientComponentHelper using expanding on + // BASIS_TYPE, BASIS_FUNCTION_INDICES, and PRODUCT_TERM_INDICES. + return ( gradientComponentHelper< BASIS_TYPES, + decltype(GRADIENT_COMPONENT)::value, + BASIS_FUNCTION_INDICES, + decltype(PRODUCT_TERM_INDICES)::value >( parentCoord ) * ... ); + }; + + // execute the gradientComponent lambda on each direction, expand the + // pack on "i" corresponding to each direction of the gradient. + return { (executeSequence< numDims >( gradientComponent, a ) )... }; + } ); #endif } diff --git a/src/functions/bases/LagrangeBasis.hpp b/src/functions/bases/LagrangeBasis.hpp index ab2b330..e18bb4a 100644 --- a/src/functions/bases/LagrangeBasis.hpp +++ b/src/functions/bases/LagrangeBasis.hpp @@ -82,17 +82,17 @@ class LagrangeBasis : public SPACING_TYPE< REAL_TYPE, ORDER + 1 > value( REAL_TYPE const & coord ) { #if __cplusplus >= 202002L - return executeSequence< numSupportPoints >( [&] < int ... a > () constexpr - { - // return fold expression that is the product of all the polynomial - // factor terms. - return ( valueProductTerm< BF_INDEX, a >( coord ) * ... ); - } ); + return executeSequence< numSupportPoints >( [&]< int ... a > () constexpr + { + // return fold expression that is the product of all the polynomial + // factor terms. + return ( valueProductTerm< BF_INDEX, a >( coord ) * ... ); + } ); #else return executeSequence< numSupportPoints >( [&] ( auto const ... a ) constexpr - { - return ( valueProductTerm< BF_INDEX, decltype(a)::value >( coord ) * ... ); - } ); + { + return ( valueProductTerm< BF_INDEX, decltype(a)::value >( coord ) * ... ); + } ); #endif } @@ -117,28 +117,28 @@ class LagrangeBasis : public SPACING_TYPE< REAL_TYPE, ORDER + 1 > { #if __cplusplus >= 202002L - return executeSequence< numSupportPoints >( [&coord] < int ... a > () constexpr - { - auto func = [&coord] < int ... b > ( auto aa ) constexpr - { - constexpr int aVal = decltype(aa)::value; - return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< BF_INDEX, b, aVal >( coord ) * ... ); - }; - - return ( executeSequence< numSupportPoints >( func, std::integral_constant< int, a >{} ) + ... ); - } ); + return executeSequence< numSupportPoints >( [&coord]< int ... a > () constexpr + { + auto func = [&coord]< int ... b > ( auto aa ) constexpr + { + constexpr int aVal = decltype(aa)::value; + return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< BF_INDEX, b, aVal >( coord ) * ... ); + }; + + return ( executeSequence< numSupportPoints >( func, std::integral_constant< int, a >{} ) + ... ); + } ); #else return executeSequence< numSupportPoints >( [&coord] ( auto const ... a ) constexpr - { - REAL_TYPE const values[ numSupportPoints ] = { valueProductTerm< BF_INDEX, decltype(a)::value >( coord )... }; - auto func = [&values] ( auto aa, auto ... b ) constexpr - { - constexpr int aVal = decltype(aa)::value; - return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< decltype(b)::value, aVal >( values ) * ... ); - }; - - return ( executeSequence< numSupportPoints >( func, a ) + ... ); - } ); + { + REAL_TYPE const values[ numSupportPoints ] = { valueProductTerm< BF_INDEX, decltype(a)::value >( coord )... }; + auto func = [&values] ( auto aa, auto ... b ) constexpr + { + constexpr int aVal = decltype(aa)::value; + return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< decltype(b)::value, aVal >( values ) * ... ); + }; + + return ( executeSequence< numSupportPoints >( func, a ) + ... ); + } ); #endif } diff --git a/src/geometry/mapping/LinearTransform.hpp b/src/geometry/mapping/LinearTransform.hpp index a44d7d6..7fde65f 100644 --- a/src/geometry/mapping/LinearTransform.hpp +++ b/src/geometry/mapping/LinearTransform.hpp @@ -169,17 +169,17 @@ jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform, auto const & nodeCoords = transform.getData(); InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr - { - CArrayNd< REAL_TYPE, DIMS > const dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( pointCoordsParent ); - // dimensional loop from domain to codomain - forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr - { - constexpr int i = decltype(ici)::value; - constexpr int j = decltype(icj)::value; - J( i, j ) = J( i, j ) + dNadXi( j ) * nodeCoords( decltype(ic_spIndices)::value ..., i ); - } ); - - } ); + { + CArrayNd< REAL_TYPE, DIMS > const dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( pointCoordsParent ); + // dimensional loop from domain to codomain + forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr + { + constexpr int i = decltype(ici)::value; + constexpr int j = decltype(icj)::value; + J( i, j ) = J( i, j ) + dNadXi( j ) * nodeCoords( decltype(ic_spIndices)::value ..., i ); + } ); + + } ); } @@ -216,27 +216,27 @@ jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform, constexpr REAL_TYPE qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... }; InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr - { - constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords ); + { + constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords ); - // dimensional loop from domain to codomain + // dimensional loop from domain to codomain #if 1 - forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr - { - constexpr int i = decltype(ici)::value; - constexpr int j = decltype(icj)::value; - J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); - } ); + forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr + { + constexpr int i = decltype(ici)::value; + constexpr int j = decltype(icj)::value; + J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); + } ); #else - for ( int j = 0; j < DIMS; ++j ) - { - for ( int i = 0; i < DIMS; ++i ) - { - J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); - } - } + for ( int j = 0; j < DIMS; ++j ) + { + for ( int i = 0; i < DIMS; ++i ) + { + J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j ); + } + } #endif - } ); + } ); } diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp index bc3c93a..5cafab9 100644 --- a/src/geometry/mapping/unitTests/testUniformScaling.cpp +++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp @@ -99,7 +99,14 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue ) double const h = 3.14; auto cell = makeUniformScaling( h ); - auto [ detJ, invJ ] = inverseJacobian( cell ); + // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple +#if defined(SHIVA_USE_CUDA) && defined(CUDA_VERSION) && (CUDA_VERSION < 12000) + auto tmp = inverseJacobian( cell ); + auto detJ = shiva::get< 0 >( tmp ); + auto invJ = shiva::get< 1 >( tmp ); +#else + auto [detJ, invJ] = inverseJacobian( cell ); +#endif EXPECT_EQ( detJ, 0.125 * h * h * h ); EXPECT_EQ( invJ( 0 ), ( 2 / h ) ); } From 3b4b89bb448c7970fd5d29d72b3d6e2e8705f2bd Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Thu, 11 Sep 2025 15:29:36 +0900 Subject: [PATCH 16/26] try again --- cmake/CMakeBasics.cmake | 5 ++- src/common/types.hpp | 33 ++++++++++++++++++- .../mapping/unitTests/testUniformScaling.cpp | 2 +- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake index 0915345..9ac48ea 100644 --- a/cmake/CMakeBasics.cmake +++ b/cmake/CMakeBasics.cmake @@ -27,6 +27,5 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG CLANG "-fstandalone-debug" ) -set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "") - -set( CAMP_ENABLE_TESTS OFF CACHE BOOL "") +option( SHIVA_ENABLE_CAMP OFF ) +option( CAMP_ENABLE_TESTS OFF ) diff --git a/src/common/types.hpp b/src/common/types.hpp index 5ef42c8..0f0198a 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -19,11 +19,11 @@ #include "common/ShivaMacros.hpp" + /// @brief Macro to define whether or not to use camp. #if defined(SHIVA_USE_CAMP) #include #else - #if defined(SHIVA_USE_CUDA) #include #else @@ -58,8 +58,29 @@ make_tuple( T && ... t ) return camp::make_tuple( std::forward< T >( t ) ... ); } +#define SHIVA_HAVE_TUPLE_SB 1 + #else #if defined(SHIVA_USE_CUDA) + +// libcudacxx (CCCL) API version: e.g. 120400 for CUDA 12.4 +#ifndef _LIBCUDACXX_CUDA_API_VERSION + #define _LIBCUDACXX_CUDA_API_VERSION 0 +#endif + +#ifndef CUDART_VERSION + #define CUDART_VERSION 0 +#endif + +// Prefer the libcudacxx API version if present; fall back to runtime version. +#if (_LIBCUDACXX_CUDA_API_VERSION >= 120400) || (CUDART_VERSION >= 12040) + #define SHIVA_HAVE_TUPLE_SB 1 +#else + #define SHIVA_HAVE_TUPLE_SB 0 +#endif + + + /** * @brief Wrapper for cuda::std::tuple. * @tparam T Types of the elements of the tuple. @@ -100,6 +121,9 @@ make_tuple( T && ... t ) { return std::make_tuple( std::forward< T >( t ) ... ); } + +#define SHIVA_HAVE_TUPLE_SB 1 + #endif #endif @@ -117,4 +141,11 @@ using int_sequence = std::integer_sequence< int, T ... >; template< int N > using make_int_sequence = std::make_integer_sequence< int, N >; + + } + +#if defined(__CUDA_ARCH__) + #undef SHIVA_HAVE_TUPLE_SB + #define SHIVA_HAVE_TUPLE_SB 0 +#endif diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp index 5cafab9..810db22 100644 --- a/src/geometry/mapping/unitTests/testUniformScaling.cpp +++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp @@ -100,7 +100,7 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue ) auto cell = makeUniformScaling( h ); // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple -#if defined(SHIVA_USE_CUDA) && defined(CUDA_VERSION) && (CUDA_VERSION < 12000) +#if defined(SHIVA_USE_CUDA) && SHIVA_HAVE_TUPLE_SB == 0 auto tmp = inverseJacobian( cell ); auto detJ = shiva::get< 0 >( tmp ); auto invJ = shiva::get< 1 >( tmp ); From 8598f077ad4a7c29448a34cd7c893bc12acdf877 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Thu, 11 Sep 2025 15:37:57 +0900 Subject: [PATCH 17/26] try again --- src/common/types.hpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/common/types.hpp b/src/common/types.hpp index 0f0198a..fce8dfa 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -79,8 +79,6 @@ make_tuple( T && ... t ) #define SHIVA_HAVE_TUPLE_SB 0 #endif - - /** * @brief Wrapper for cuda::std::tuple. * @tparam T Types of the elements of the tuple. @@ -100,6 +98,9 @@ auto make_tuple( T && ... t ) { return cuda::std::make_tuple( std::forward< T >( t ) ... ); } + +using cuda::std::get; // expose cuda::std::get as shiva::get + #else /** * @brief Wrapper for std::tuple. @@ -123,6 +124,7 @@ make_tuple( T && ... t ) } #define SHIVA_HAVE_TUPLE_SB 1 +using std::get; #endif #endif @@ -142,10 +144,4 @@ template< int N > using make_int_sequence = std::make_integer_sequence< int, N >; - } - -#if defined(__CUDA_ARCH__) - #undef SHIVA_HAVE_TUPLE_SB - #define SHIVA_HAVE_TUPLE_SB 0 -#endif From 2d82182dae2f2e4dc1c9bf99039c0788edaae3e5 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Thu, 11 Sep 2025 16:08:33 +0900 Subject: [PATCH 18/26] remove a bunch of crap...just don't use the bindings for cuda wihtout camp --- src/common/types.hpp | 33 ++----------------- .../mapping/unitTests/testUniformScaling.cpp | 6 ++-- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/src/common/types.hpp b/src/common/types.hpp index fce8dfa..78f9704 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -19,11 +19,11 @@ #include "common/ShivaMacros.hpp" - /// @brief Macro to define whether or not to use camp. #if defined(SHIVA_USE_CAMP) #include #else + #if defined(SHIVA_USE_CUDA) #include #else @@ -51,34 +51,15 @@ using tuple = camp::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > -SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE -auto +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE +auto make_tuple( T && ... t ) { return camp::make_tuple( std::forward< T >( t ) ... ); } -#define SHIVA_HAVE_TUPLE_SB 1 - #else #if defined(SHIVA_USE_CUDA) - -// libcudacxx (CCCL) API version: e.g. 120400 for CUDA 12.4 -#ifndef _LIBCUDACXX_CUDA_API_VERSION - #define _LIBCUDACXX_CUDA_API_VERSION 0 -#endif - -#ifndef CUDART_VERSION - #define CUDART_VERSION 0 -#endif - -// Prefer the libcudacxx API version if present; fall back to runtime version. -#if (_LIBCUDACXX_CUDA_API_VERSION >= 120400) || (CUDART_VERSION >= 12040) - #define SHIVA_HAVE_TUPLE_SB 1 -#else - #define SHIVA_HAVE_TUPLE_SB 0 -#endif - /** * @brief Wrapper for cuda::std::tuple. * @tparam T Types of the elements of the tuple. @@ -98,9 +79,6 @@ auto make_tuple( T && ... t ) { return cuda::std::make_tuple( std::forward< T >( t ) ... ); } - -using cuda::std::get; // expose cuda::std::get as shiva::get - #else /** * @brief Wrapper for std::tuple. @@ -122,10 +100,6 @@ make_tuple( T && ... t ) { return std::make_tuple( std::forward< T >( t ) ... ); } - -#define SHIVA_HAVE_TUPLE_SB 1 -using std::get; - #endif #endif @@ -143,5 +117,4 @@ using int_sequence = std::integer_sequence< int, T ... >; template< int N > using make_int_sequence = std::make_integer_sequence< int, N >; - } diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp index 810db22..0665e5a 100644 --- a/src/geometry/mapping/unitTests/testUniformScaling.cpp +++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp @@ -100,12 +100,12 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue ) auto cell = makeUniformScaling( h ); // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple -#if defined(SHIVA_USE_CUDA) && SHIVA_HAVE_TUPLE_SB == 0 +#if defined(SHIVA_USE_CAMP) || !defined(SHIVA_USE_CUDA) + auto [detJ, invJ] = inverseJacobian( cell ); +#else auto tmp = inverseJacobian( cell ); auto detJ = shiva::get< 0 >( tmp ); auto invJ = shiva::get< 1 >( tmp ); -#else - auto [detJ, invJ] = inverseJacobian( cell ); #endif EXPECT_EQ( detJ, 0.125 * h * h * h ); EXPECT_EQ( invJ( 0 ), ( 2 / h ) ); From 3cba8eb67ab38c1036c09fb2c5382724fe284f33 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Thu, 11 Sep 2025 16:34:36 +0900 Subject: [PATCH 19/26] add cuda version through cmake --- cmake/CMakeBasics.cmake | 21 +++++++++++++++++++ docs/doxygen/ShivaConfig.hpp | 4 ++++ src/ShivaConfig.hpp.in | 6 +++++- src/common/types.hpp | 4 ++-- .../mapping/unitTests/testUniformScaling.cpp | 2 +- 5 files changed, 33 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake index 9ac48ea..f38bccc 100644 --- a/cmake/CMakeBasics.cmake +++ b/cmake/CMakeBasics.cmake @@ -29,3 +29,24 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG option( SHIVA_ENABLE_CAMP OFF ) option( CAMP_ENABLE_TESTS OFF ) + + +if( ENABLE_CUDA ) + # Extract CUDA version from CMake’s variables + set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION}) + + # Also normalize to an integer for easy comparison (e.g. 12040 for 12.4.0) + string(REPLACE "." ";" CUDA_VERSION_LIST ${CUDAToolkit_VERSION}) + list(GET CUDA_VERSION_LIST 0 CUDA_MAJOR) + list(GET CUDA_VERSION_LIST 1 CUDA_MINOR) + list(GET CUDA_VERSION_LIST 2 CUDA_PATCH) + + math(EXPR CUDA_VERSION_INT "${CUDA_MAJOR}*1000 + ${CUDA_MINOR}*10 + ${CUDA_PATCH}") + + target_compile_definitions( shiva PUBLIC + SHIVA_CUDA_VERSION_STR="${CUDAToolkit_VERSION}" + SHIVA_CUDA_VERSION_INT=${CUDA_VERSION_INT} + SHIVA_CUDA_MAJOR=${CUDA_MAJOR} + SHIVA_CUDA_MINOR=${CUDA_MINOR} + ) +endif() diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index 4ac8dd9..c2e3f65 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -17,3 +17,7 @@ #define SHIVA_USE_CAMP #define SHIVA_USE_BOUNDS_CHECK + +/* #undef SHIVA_CUDA_MAJOR */ + +/* #undef SHIVA_CUDA_MINOR */ diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in index f0f4806..56dbc07 100644 --- a/src/ShivaConfig.hpp.in +++ b/src/ShivaConfig.hpp.in @@ -16,4 +16,8 @@ #cmakedefine SHIVA_USE_CAMP -#cmakedefine SHIVA_USE_BOUNDS_CHECK \ No newline at end of file +#cmakedefine SHIVA_USE_BOUNDS_CHECK + +#cmakedefine SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@ + +#cmakedefine SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@ \ No newline at end of file diff --git a/src/common/types.hpp b/src/common/types.hpp index 78f9704..5ef42c8 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -51,8 +51,8 @@ using tuple = camp::tuple< T ... >; * @return A tuple with the elements passed as arguments. */ template< typename ... T > -SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE -auto +SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE +auto make_tuple( T && ... t ) { return camp::make_tuple( std::forward< T >( t ) ... ); diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp index 0665e5a..f36a478 100644 --- a/src/geometry/mapping/unitTests/testUniformScaling.cpp +++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp @@ -100,7 +100,7 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue ) auto cell = makeUniformScaling( h ); // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple -#if defined(SHIVA_USE_CAMP) || !defined(SHIVA_USE_CUDA) +#if defined(SHIVA_USE_CAMP) || SHIVA_CUDA_MAJOR >= 12 || !defined(SHIVA_USE_CUDA) auto [detJ, invJ] = inverseJacobian( cell ); #else auto tmp = inverseJacobian( cell ); From a34ddcf6f77c5e05a9702df32168a410fef8ae80 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Thu, 11 Sep 2025 17:01:07 +0900 Subject: [PATCH 20/26] try again buddy --- cmake/CMakeBasics.cmake | 33 +++++++++---------- docs/doxygen/ShivaConfig.hpp | 7 ++-- src/ShivaConfig.hpp.in | 7 ++-- .../mapping/unitTests/testUniformScaling.cpp | 6 ++-- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake index f38bccc..61844a8 100644 --- a/cmake/CMakeBasics.cmake +++ b/cmake/CMakeBasics.cmake @@ -32,21 +32,20 @@ option( CAMP_ENABLE_TESTS OFF ) if( ENABLE_CUDA ) - # Extract CUDA version from CMake’s variables - set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION}) - - # Also normalize to an integer for easy comparison (e.g. 12040 for 12.4.0) - string(REPLACE "." ";" CUDA_VERSION_LIST ${CUDAToolkit_VERSION}) - list(GET CUDA_VERSION_LIST 0 CUDA_MAJOR) - list(GET CUDA_VERSION_LIST 1 CUDA_MINOR) - list(GET CUDA_VERSION_LIST 2 CUDA_PATCH) - - math(EXPR CUDA_VERSION_INT "${CUDA_MAJOR}*1000 + ${CUDA_MINOR}*10 + ${CUDA_PATCH}") - - target_compile_definitions( shiva PUBLIC - SHIVA_CUDA_VERSION_STR="${CUDAToolkit_VERSION}" - SHIVA_CUDA_VERSION_INT=${CUDA_VERSION_INT} - SHIVA_CUDA_MAJOR=${CUDA_MAJOR} - SHIVA_CUDA_MINOR=${CUDA_MINOR} - ) + if(CUDAToolkit_FOUND AND CUDAToolkit_VERSION) + set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION}) + string(REPLACE "." ";" _ver_list ${CUDAToolkit_VERSION}) + list(GET _ver_list 0 SHIVA_CUDA_MAJOR) + list(GET _ver_list 1 SHIVA_CUDA_MINOR) + list(GET _ver_list 2 SHIVA_CUDA_PATCHLEVEL) + math(EXPR SHIVA_CUDA_VERSION_INT "${SHIVA_CUDA_MAJOR}*1000 + ${SHIVA_CUDA_MINOR}*10 + ${SHIVA_CUDA_PATCHLEVEL}") + else() + message(FATAL_ERROR "Could not determine CUDA version. Please set CUDAToolkit_ROOT to the location of your CUDA installation.") + endif() +else() + set(SHIVA_CUDA_VERSION "0.0.0") + set(SHIVA_CUDA_MAJOR 0) + set(SHIVA_CUDA_MINOR 0) + set(SHIVA_CUDA_PATCHLEVEL 0) + set(SHIVA_CUDA_VERSION_INT 0) endif() diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index c2e3f65..62fe5b0 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -18,6 +18,7 @@ #define SHIVA_USE_BOUNDS_CHECK -/* #undef SHIVA_CUDA_MAJOR */ - -/* #undef SHIVA_CUDA_MINOR */ +#define SHIVA_CUDA_MAJOR 0 +#define SHIVA_CUDA_MINOR 0 +#define SHIVA_CUDA_PATCHLEVEL 0 +#define SHIVA_CUDA_VERSION_INT 0 diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in index 56dbc07..e8ea9ce 100644 --- a/src/ShivaConfig.hpp.in +++ b/src/ShivaConfig.hpp.in @@ -18,6 +18,7 @@ #cmakedefine SHIVA_USE_BOUNDS_CHECK -#cmakedefine SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@ - -#cmakedefine SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@ \ No newline at end of file +#define SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@ +#define SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@ +#define SHIVA_CUDA_PATCHLEVEL @SHIVA_CUDA_PATCHLEVEL@ +#define SHIVA_CUDA_VERSION_INT @SHIVA_CUDA_VERSION_INT@ \ No newline at end of file diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp index f36a478..543ae70 100644 --- a/src/geometry/mapping/unitTests/testUniformScaling.cpp +++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp @@ -100,12 +100,12 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue ) auto cell = makeUniformScaling( h ); // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple -#if defined(SHIVA_USE_CAMP) || SHIVA_CUDA_MAJOR >= 12 || !defined(SHIVA_USE_CUDA) - auto [detJ, invJ] = inverseJacobian( cell ); -#else +#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12 auto tmp = inverseJacobian( cell ); auto detJ = shiva::get< 0 >( tmp ); auto invJ = shiva::get< 1 >( tmp ); +#else + auto [detJ, invJ] = inverseJacobian( cell ); #endif EXPECT_EQ( detJ, 0.125 * h * h * h ); EXPECT_EQ( invJ( 0 ), ( 2 / h ) ); From 93cd3aa567bee63387f60f8c7eb47200c69c6664 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Thu, 11 Sep 2025 18:24:32 +0900 Subject: [PATCH 21/26] stop listening to chat --- cmake/CMakeBasics.cmake | 21 ++++++++------------- docs/doxygen/ShivaConfig.hpp | 4 ++-- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake index 61844a8..82a64a7 100644 --- a/cmake/CMakeBasics.cmake +++ b/cmake/CMakeBasics.cmake @@ -32,20 +32,15 @@ option( CAMP_ENABLE_TESTS OFF ) if( ENABLE_CUDA ) - if(CUDAToolkit_FOUND AND CUDAToolkit_VERSION) - set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION}) - string(REPLACE "." ";" _ver_list ${CUDAToolkit_VERSION}) - list(GET _ver_list 0 SHIVA_CUDA_MAJOR) - list(GET _ver_list 1 SHIVA_CUDA_MINOR) - list(GET _ver_list 2 SHIVA_CUDA_PATCHLEVEL) - math(EXPR SHIVA_CUDA_VERSION_INT "${SHIVA_CUDA_MAJOR}*1000 + ${SHIVA_CUDA_MINOR}*10 + ${SHIVA_CUDA_PATCHLEVEL}") + if( CUDA_VERSION AND CUDA_VERSION_MAJOR AND CUDA_VERSION_MINOR ) + set( SHIVA_CUDA_VERSION ${CUDA_VERSION} ) + set( SHIVA_CUDA_MAJOR ${CUDA_VERSION_MAJOR} ) + set( SHIVA_CUDA_MINOR ${CUDA_VERSION_MINOR} ) else() - message(FATAL_ERROR "Could not determine CUDA version. Please set CUDAToolkit_ROOT to the location of your CUDA installation.") + message(FATAL_ERROR "CUDA_VERSION_MAJOR and CUDA_VERSION_MINOR not defined") endif() else() - set(SHIVA_CUDA_VERSION "0.0.0") - set(SHIVA_CUDA_MAJOR 0) - set(SHIVA_CUDA_MINOR 0) - set(SHIVA_CUDA_PATCHLEVEL 0) - set(SHIVA_CUDA_VERSION_INT 0) + set( SHIVA_CUDA_VERSION 0 ) + set( SHIVA_CUDA_MAJOR 0 ) + set( SHIVA_CUDA_MINOR 0 ) endif() diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index 62fe5b0..493d655 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -20,5 +20,5 @@ #define SHIVA_CUDA_MAJOR 0 #define SHIVA_CUDA_MINOR 0 -#define SHIVA_CUDA_PATCHLEVEL 0 -#define SHIVA_CUDA_VERSION_INT 0 +#define SHIVA_CUDA_PATCHLEVEL +#define SHIVA_CUDA_VERSION_INT From 723a02c6b9c0db19ea6d16daac6c6cb965ee58c9 Mon Sep 17 00:00:00 2001 From: Randolph R Settgast Date: Thu, 11 Sep 2025 08:18:59 -0500 Subject: [PATCH 22/26] fix some issues --- docs/doxygen/ShivaConfig.hpp | 8 +++---- src/ShivaConfig.hpp.in | 2 -- src/common/ShivaMacros.hpp | 43 +++++++++++++++++++++++++++++------- src/common/types.hpp | 1 + 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index 493d655..b0a1aa6 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -8,17 +8,17 @@ #define SHIVA_VERSION_PATCHLEVEL 0 -/* #undef SHIVA_USE_CUDA */ +#define SHIVA_USE_CUDA /* #undef SHIVA_USE_HIP */ /* #undef SHIVA_USE_CALIPER */ -#define SHIVA_USE_CAMP +/* #undef SHIVA_USE_CAMP */ #define SHIVA_USE_BOUNDS_CHECK -#define SHIVA_CUDA_MAJOR 0 -#define SHIVA_CUDA_MINOR 0 +#define SHIVA_CUDA_MAJOR 12 +#define SHIVA_CUDA_MINOR 6 #define SHIVA_CUDA_PATCHLEVEL #define SHIVA_CUDA_VERSION_INT diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in index e8ea9ce..7103b82 100644 --- a/src/ShivaConfig.hpp.in +++ b/src/ShivaConfig.hpp.in @@ -20,5 +20,3 @@ #define SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@ #define SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@ -#define SHIVA_CUDA_PATCHLEVEL @SHIVA_CUDA_PATCHLEVEL@ -#define SHIVA_CUDA_VERSION_INT @SHIVA_CUDA_VERSION_INT@ \ No newline at end of file diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp index c3280e1..0ce8264 100644 --- a/src/common/ShivaMacros.hpp +++ b/src/common/ShivaMacros.hpp @@ -110,11 +110,38 @@ void i_g_n_o_r_e( ARGS const & ... ) {} * @param cond The condition to assert is true. * @param ... The message to print if the assertion fails. */ -#define SHIVA_ASSERT_MSG( cond, ... ) \ - do { \ - if ( !(cond)) { \ - if ( !__builtin_is_constant_evaluated()) { \ - shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \ - } \ - } \ - } while ( 0 ) +#if defined(__CUDACC__) +// NVCC: avoid is_constant_evaluated/if consteval to silence #3060. +// (Optional: you can also add --diag-suppress=3060 instead.) + #define SHIVA_ASSERT_MSG(cond, ...) \ + do { \ + if (!(cond)) { \ + shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__); \ + } \ + } while (0) +#elif defined(__cpp_if_consteval) && __cpp_if_consteval >= 202106L +// Modern C++: prefer 'if consteval' + #define SHIVA_ASSERT_MSG(cond, ...) \ + do { \ + if (!(cond)) { \ + if consteval { \ + static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \ + } else { \ + shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__); \ + } \ + } \ + } while (0) +#else +// Portable fallback using std::is_constant_evaluated (no NVCC) + #include + #define SHIVA_ASSERT_MSG(cond, ...) \ + do { \ + if (!(cond)) { \ + if (!std::is_constant_evaluated()) { \ + shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__); \ + } else { \ + static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \ + } \ + } \ + } while (0) +#endif diff --git a/src/common/types.hpp b/src/common/types.hpp index 5ef42c8..9c1ce28 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -66,6 +66,7 @@ make_tuple( T && ... t ) */ template< typename ... T > using tuple = cuda::std::tuple< T ... >; +using cuda::std::get; /** * @brief Wrapper for cuda::std::make_tuple. From cbca43cc684cc3162580d0ce31036aa11d9432f4 Mon Sep 17 00:00:00 2001 From: Randolph R Settgast Date: Thu, 11 Sep 2025 08:22:06 -0500 Subject: [PATCH 23/26] add maple hostconfig --- hostconfigs/TTE/maple_rocky9.cmake | 24 +++++++++++++++++ src/common/ShivaMacros.hpp | 43 ++++++------------------------ 2 files changed, 32 insertions(+), 35 deletions(-) create mode 100644 hostconfigs/TTE/maple_rocky9.cmake diff --git a/hostconfigs/TTE/maple_rocky9.cmake b/hostconfigs/TTE/maple_rocky9.cmake new file mode 100644 index 0000000..e3f4a83 --- /dev/null +++ b/hostconfigs/TTE/maple_rocky9.cmake @@ -0,0 +1,24 @@ +set(CONFIG_NAME "maple_rocky9" CACHE PATH "") + +set(COMPILER_DIR /opt/rh/gcc-toolset-13/root/ ) +set(CMAKE_C_COMPILER ${COMPILER_DIR}/bin/gcc CACHE PATH "") +set(CMAKE_CXX_COMPILER ${COMPILER_DIR}/bin/g++ CACHE PATH "") + +# C++ options +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -mtune=native -march=native" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +# Cuda options +set(ENABLE_CUDA ON CACHE BOOL "") +set(CUDA_TOOLKIT_ROOT_DIR /hrtc/apps/cuda/12.6.20/aarch64/rocky9 CACHE STRING "") +set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "") +set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "") +set(CMAKE_CUDA_ARCHITECTURES 90 CACHE STRING "") +set(CMAKE_CUDA_STANDARD 17 CACHE STRING "") +set(CMAKE_CUDA_FLAGS "-restrict --expt-extended-lambda --expt-relaxed-constexpr -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "") +#set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3 -Xcompiler -mcpu=powerpc64le -Xcompiler -mtune=powerpc64le" CACHE STRING "") +#set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "") +#set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "") + +set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "Disable CAMP support" FORCE ) \ No newline at end of file diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp index 0ce8264..c3280e1 100644 --- a/src/common/ShivaMacros.hpp +++ b/src/common/ShivaMacros.hpp @@ -110,38 +110,11 @@ void i_g_n_o_r_e( ARGS const & ... ) {} * @param cond The condition to assert is true. * @param ... The message to print if the assertion fails. */ -#if defined(__CUDACC__) -// NVCC: avoid is_constant_evaluated/if consteval to silence #3060. -// (Optional: you can also add --diag-suppress=3060 instead.) - #define SHIVA_ASSERT_MSG(cond, ...) \ - do { \ - if (!(cond)) { \ - shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__); \ - } \ - } while (0) -#elif defined(__cpp_if_consteval) && __cpp_if_consteval >= 202106L -// Modern C++: prefer 'if consteval' - #define SHIVA_ASSERT_MSG(cond, ...) \ - do { \ - if (!(cond)) { \ - if consteval { \ - static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \ - } else { \ - shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__); \ - } \ - } \ - } while (0) -#else -// Portable fallback using std::is_constant_evaluated (no NVCC) - #include - #define SHIVA_ASSERT_MSG(cond, ...) \ - do { \ - if (!(cond)) { \ - if (!std::is_constant_evaluated()) { \ - shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__); \ - } else { \ - static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \ - } \ - } \ - } while (0) -#endif +#define SHIVA_ASSERT_MSG( cond, ... ) \ + do { \ + if ( !(cond)) { \ + if ( !__builtin_is_constant_evaluated()) { \ + shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \ + } \ + } \ + } while ( 0 ) From 85e40e00cb1b08f9066180542e3bc6c504bdc2fa Mon Sep 17 00:00:00 2001 From: "Randolph R. Settgast" Date: Thu, 11 Sep 2025 18:25:41 -0700 Subject: [PATCH 24/26] some fixes --- cmake/blt | 2 +- docs/doxygen/ShivaConfig.hpp | 6 +-- src/common/ShivaMacros.hpp | 46 ++++++++++++++++--- .../mapping/unitTests/testLinearTransform.cpp | 6 +++ .../mapping/unitTests/testScaling.cpp | 8 +++- 5 files changed, 55 insertions(+), 13 deletions(-) diff --git a/cmake/blt b/cmake/blt index fb4246b..9ff7734 160000 --- a/cmake/blt +++ b/cmake/blt @@ -1 +1 @@ -Subproject commit fb4246b8bae74c3d7291bef9698fd38863844680 +Subproject commit 9ff77344f0b2a6ee345e452bddd6bfd46cbbfa35 diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index b0a1aa6..b186086 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -18,7 +18,5 @@ #define SHIVA_USE_BOUNDS_CHECK -#define SHIVA_CUDA_MAJOR 12 -#define SHIVA_CUDA_MINOR 6 -#define SHIVA_CUDA_PATCHLEVEL -#define SHIVA_CUDA_VERSION_INT +#define SHIVA_CUDA_MAJOR 11 +#define SHIVA_CUDA_MINOR 8 diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp index c3280e1..cad432a 100644 --- a/src/common/ShivaMacros.hpp +++ b/src/common/ShivaMacros.hpp @@ -105,16 +105,48 @@ void i_g_n_o_r_e( ARGS const & ... ) {} + +// Portable builtin detector +#ifndef SHIVA_HAS_BUILTIN + #ifdef __has_builtin + #define SHIVA_HAS_BUILTIN(x) __has_builtin(x) + #else + #define SHIVA_HAS_BUILTIN(x) 0 + #endif +#endif + +// Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain +#if defined(__CUDA_ARCH__) + // Device code (nvcc, hipcc): no support in C++17 + #define SHIVA_IS_CONST_EVAL() (false) + +#elif SHIVA_HAS_BUILTIN(__builtin_is_constant_evaluated) + // GCC / Clang host code + #define SHIVA_IS_CONST_EVAL() (__builtin_is_constant_evaluated()) + +#elif defined(_MSC_VER) + // MSVC + #define SHIVA_IS_CONST_EVAL() (__is_constant_evaluated()) + +#else + // Fallback: always runtime + #define SHIVA_IS_CONST_EVAL() (false) +#endif + + + /** * @brief This macro is used to implement an assertion. * @param cond The condition to assert is true. * @param ... The message to print if the assertion fails. */ #define SHIVA_ASSERT_MSG( cond, ... ) \ - do { \ - if ( !(cond)) { \ - if ( !__builtin_is_constant_evaluated()) { \ - shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \ - } \ - } \ - } while ( 0 ) +do { \ + if ( !(cond)) { \ + if ( !SHIVA_IS_CONST_EVAL() ) { \ + shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \ + } \ + } \ +} while ( 0 ) + + diff --git a/src/geometry/mapping/unitTests/testLinearTransform.cpp b/src/geometry/mapping/unitTests/testLinearTransform.cpp index 9087bb6..5a575bf 100644 --- a/src/geometry/mapping/unitTests/testLinearTransform.cpp +++ b/src/geometry/mapping/unitTests/testLinearTransform.cpp @@ -305,7 +305,13 @@ void testInvJacobianFunctionReturnByValueHelper() auto cell = makeLinearTransform( Xref ); for ( int q = 0; q < 8; ++q ) { +#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12 + auto tmp = inverseJacobian( cell, qCoords[q] ); + auto detJ = shiva::get< 0 >( tmp ); + auto invJ = shiva::get< 1 >( tmp ); +#else auto [ detJ, invJ ] = inverseJacobian( cell, qCoords[q] ); +#endif kernelData[ 10 * q ] = detJ; for ( int i = 0; i < 3; ++i ) diff --git a/src/geometry/mapping/unitTests/testScaling.cpp b/src/geometry/mapping/unitTests/testScaling.cpp index 91f8129..b83bbb2 100644 --- a/src/geometry/mapping/unitTests/testScaling.cpp +++ b/src/geometry/mapping/unitTests/testScaling.cpp @@ -143,7 +143,13 @@ void testInvJacobianFunctionReturnByValueHelper() { auto cell = makeScaling( h ); - auto [ detJ, invJ ] = inverseJacobian( cell ); +#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12 + auto tmp = inverseJacobian( cell ); + auto detJ = shiva::get< 0 >( tmp ); + auto invJ = shiva::get< 1 >( tmp ); +#else + auto [detJ, invJ] = inverseJacobian( cell ); +#endif kdata[0] = detJ; kdata[1] = invJ( 0 ); kdata[2] = invJ( 1 ); From 28880b2b44824a25110e0987e7da62f88ba02904 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Fri, 12 Sep 2025 19:59:17 +0900 Subject: [PATCH 25/26] doxygen --- docs/doxygen/ShivaConfig.hpp | 8 ++++---- src/common/ShivaMacros.hpp | 39 ++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp index b186086..8e64b01 100644 --- a/docs/doxygen/ShivaConfig.hpp +++ b/docs/doxygen/ShivaConfig.hpp @@ -8,15 +8,15 @@ #define SHIVA_VERSION_PATCHLEVEL 0 -#define SHIVA_USE_CUDA +/* #undef SHIVA_USE_CUDA */ /* #undef SHIVA_USE_HIP */ /* #undef SHIVA_USE_CALIPER */ -/* #undef SHIVA_USE_CAMP */ +#define SHIVA_USE_CAMP #define SHIVA_USE_BOUNDS_CHECK -#define SHIVA_CUDA_MAJOR 11 -#define SHIVA_CUDA_MINOR 8 +#define SHIVA_CUDA_MAJOR 0 +#define SHIVA_CUDA_MINOR 0 diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp index cad432a..f8ddd3d 100644 --- a/src/common/ShivaMacros.hpp +++ b/src/common/ShivaMacros.hpp @@ -105,31 +105,34 @@ void i_g_n_o_r_e( ARGS const & ... ) {} - -// Portable builtin detector +/** + * @brief This macro is used to detect the presence of builtin functions. + */ #ifndef SHIVA_HAS_BUILTIN #ifdef __has_builtin - #define SHIVA_HAS_BUILTIN(x) __has_builtin(x) + #define SHIVA_HAS_BUILTIN( x ) __has_builtin( x ) #else - #define SHIVA_HAS_BUILTIN(x) 0 + #define SHIVA_HAS_BUILTIN( x ) 0 #endif #endif -// Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain +/** + * @brief Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain + */ #if defined(__CUDA_ARCH__) - // Device code (nvcc, hipcc): no support in C++17 +// Device code (nvcc, hipcc): no support in C++17 #define SHIVA_IS_CONST_EVAL() (false) -#elif SHIVA_HAS_BUILTIN(__builtin_is_constant_evaluated) - // GCC / Clang host code +#elif SHIVA_HAS_BUILTIN( __builtin_is_constant_evaluated ) +// GCC / Clang host code #define SHIVA_IS_CONST_EVAL() (__builtin_is_constant_evaluated()) #elif defined(_MSC_VER) - // MSVC +// MSVC #define SHIVA_IS_CONST_EVAL() (__is_constant_evaluated()) #else - // Fallback: always runtime +// Fallback: always runtime #define SHIVA_IS_CONST_EVAL() (false) #endif @@ -141,12 +144,10 @@ void i_g_n_o_r_e( ARGS const & ... ) {} * @param ... The message to print if the assertion fails. */ #define SHIVA_ASSERT_MSG( cond, ... ) \ -do { \ - if ( !(cond)) { \ - if ( !SHIVA_IS_CONST_EVAL() ) { \ - shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \ - } \ - } \ -} while ( 0 ) - - + do { \ + if ( !(cond)) { \ + if ( !SHIVA_IS_CONST_EVAL() ) { \ + shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \ + } \ + } \ + } while ( 0 ) From 63d667de1caecc2949409e1cb2412848b2c79916 Mon Sep 17 00:00:00 2001 From: Randolph Settgast Date: Fri, 12 Sep 2025 20:10:21 +0900 Subject: [PATCH 26/26] Apply suggestions from code review --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index a546177..000410f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "cmake/blt"] path = cmake/blt - url = https://github.com/LLNL/blt.git + url = ../../LLNL/blt.git [submodule "tpl/camp"] path = tpl/camp - url = https://github.com/LLNL/camp.git + url = ../../LLNL/camp.git