From 86f6622339ee7896a5433e780518de43ac1c087f Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Sun, 16 Mar 2025 15:48:25 -0700
Subject: [PATCH 01/26] comment out assert calls which are not constexpr

---
 src/functions/quadrature/Quadrature.hpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/functions/quadrature/Quadrature.hpp b/src/functions/quadrature/Quadrature.hpp
index 0e988cf..ea0dc39 100644
--- a/src/functions/quadrature/Quadrature.hpp
+++ b/src/functions/quadrature/Quadrature.hpp
@@ -53,13 +53,13 @@ struct QuadratureGaussLegendre : public GaussLegendreSpacing< REAL_TYPE, N >
     }
     else if constexpr ( N == 3 )
     {
-      assert( index >= 0 && index < 3 );
+      //assert( index >= 0 && index < 3 );
       return 0.5555555555555555555555555555555556 +
              0.3333333333333333333333333333333333 * ( index & 1 );
     }
     else if constexpr ( N == 4 )
     {
-      assert( index >= 0 && index < 4 );
+      //assert( index >= 0 && index < 4 );
       return 0.5 + ( -1 + ( ( ( index + 1 ) & 2 ) ) ) * 0.15214515486254614262693605077800059277;
     }
     return std::numeric_limits<REAL_TYPE>::max();
@@ -145,20 +145,20 @@ struct QuadratureGaussLobatto : public GaussLobattoSpacing< REAL_TYPE, N >
     }
     else if constexpr ( N == 3 )
     {
-      assert( index >= 0 && index < 3 );
+      //assert( index >= 0 && index < 3 );
       return 0.3333333333333333333333333333333333 + ( index & 1 );
     }
     else if constexpr ( N == 4 )
     {
-      assert( index >= 0 && index < 4 );
+      //assert( index >= 0 && index < 4 );
       return 0.1666666666666666666666666666666667 + ( ((index + 1) & 2) >> 1 ) * 0.6666666666666666666666666666666667;
     }
     else if constexpr ( N == 5 )
     {
-      assert( index >= 0 && index < 5 );
+      //assert( index >= 0 && index < 5 );
       return 0.1 + (index & 1) * 0.4444444444444444444444444444444444 + !( index - 2 ) * 0.6111111111111111111111111111111111;
     }
-    return std::numeric_limits<REAL_TYPE>::max();
+    return 0;//std::numeric_limits<REAL_TYPE>::max();
   }
 
   /**

From 5d873d1406ed63cecefb7211b1086b16ad6a63bd Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Sat, 22 Mar 2025 16:34:02 -0700
Subject: [PATCH 02/26] make camp more optional

---
 CMakeLists.txt               | 12 +++++++-----
 cmake/CMakeBasics.cmake      |  1 +
 cmake/Config.cmake           |  3 ++-
 docs/doxygen/ShivaConfig.hpp |  2 ++
 src/ShivaConfig.hpp.in       |  4 +++-
 src/common/types.hpp         |  1 -
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ab43b11..dc68e5a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,13 +65,15 @@ endif()
 include( cmake/Macros.cmake )
 include( cmake/Config.cmake )
 
-
 add_subdirectory( src )
-add_subdirectory( tpl/camp )
-target_compile_options( camp PRIVATE "-Wno-shadow") 
 
-configure_file(tpl/camp/include/camp/config.in.hpp
-  ${PROJECT_BINARY_DIR}/include/camp/config.hpp)
+if( SHIVA_ENABLE_CAMP )
+    add_subdirectory( tpl/camp )
+    target_compile_options( camp PRIVATE "-Wno-shadow") 
+
+    configure_file(tpl/camp/include/camp/config.in.hpp
+      ${PROJECT_BINARY_DIR}/include/camp/config.hpp)
+endif()
 
 
 if( SHIVA_ENABLE_DOCS )
diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake
index 93a1c1b..0fe1b85 100644
--- a/cmake/CMakeBasics.cmake
+++ b/cmake/CMakeBasics.cmake
@@ -25,4 +25,5 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG
                                  CLANG "-fstandalone-debug"
                                 )
 
+set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "")
 set( CAMP_ENABLE_TESTS OFF CACHE BOOL "")
diff --git a/cmake/Config.cmake b/cmake/Config.cmake
index 24e78c7..039c2c1 100644
--- a/cmake/Config.cmake
+++ b/cmake/Config.cmake
@@ -1,11 +1,12 @@
 #
 set( PREPROCESSOR_DEFINES CUDA
                           HIP
+                          CAMP
                         )
 
 set( USE_CONFIGFILE ON CACHE BOOL "" )
 foreach( DEP in ${PREPROCESSOR_DEFINES})
-    if( ${DEP}_FOUND OR ENABLE_${DEP} )
+    if( ${DEP}_FOUND OR ENABLE_${DEP} OR SHIVA_ENABLE_${DEP} )
         set( SHIVA_USE_${DEP} TRUE )
     endif()
 endforeach()
diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index d89803f..119e591 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -13,3 +13,5 @@
 /* #undef SHIVA_USE_HIP */
 
 /* #undef SHIVA_USE_CALIPER */
+
+/* #undef SHIVA_USE_CAMP */
diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in
index 6c01b1d..f1380d3 100644
--- a/src/ShivaConfig.hpp.in
+++ b/src/ShivaConfig.hpp.in
@@ -12,4 +12,6 @@
 
 #cmakedefine SHIVA_USE_HIP
 
-#cmakedefine SHIVA_USE_CALIPER
\ No newline at end of file
+#cmakedefine SHIVA_USE_CALIPER
+
+#cmakedefine SHIVA_USE_CAMP
\ No newline at end of file
diff --git a/src/common/types.hpp b/src/common/types.hpp
index ff2afd0..26e22f7 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -20,7 +20,6 @@
 #include "common/ShivaMacros.hpp"
 
 /// @brief Macro to define whether or not to use camp.
-#define SHIVA_USE_CAMP
 #if defined(SHIVA_USE_CAMP)
 #include <camp/camp.hpp>
 #else

From abed5585b6c037703378aadd235b997a4310d954 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Sat, 29 Mar 2025 10:49:38 +0900
Subject: [PATCH 03/26] some cmake fixes

---
 CMakeLists.txt            | 14 +++++++++++---
 src/CMakeLists.txt        |  3 ++-
 src/common/CMakeLists.txt |  8 ++++----
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dc68e5a..1ab0fa6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,12 +15,12 @@ set( SHIVA_VERSION_PATCHLEVEL 0 )
 # check if Shiva is build as a submodule or a separate project
 get_directory_property( parent_dir PARENT_DIRECTORY )
 if(parent_dir)
-    set( is_submodule ON )
+    set( SHIVA_IS_SUBMODULE ON )
 else()
-    set( is_submodule OFF )
+    set( SHIVA_IS_SUBMODULE OFF )
 endif()
 
-if( NOT is_submodule )
+if( NOT SHIVA_IS_SUBMODULE )
     message( "not a submodule")
     project( Shiva LANGUAGES CXX C )
 
@@ -65,6 +65,14 @@ endif()
 include( cmake/Macros.cmake )
 include( cmake/Config.cmake )
 
+
+set(SHIVA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
+set(SHIVA_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )
+
+message( STATUS "SHIVA_BINARY_DIR: ${SHIVA_BINARY_DIR}" )
+message( STATUS "SHIVA_SOURCE_DIR: ${SHIVA_SOURCE_DIR}" )
+
+
 add_subdirectory( src )
 
 if( SHIVA_ENABLE_CAMP )
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ca02250..b680057 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -23,7 +23,8 @@ blt_add_library( NAME             shiva
 
 target_include_directories( shiva
                             INTERFACE
-                            $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
+                            $<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/include>
+                            $<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/src>
                             $<INSTALL_INTERFACE:include> )
 
 install( FILES ${shiva_headers} 
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 9c684d5..f754295 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -34,14 +34,14 @@ blt_add_library( NAME             common
 
 target_include_directories( common 
                             INTERFACE 
-                            $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
-                            $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/src>
+                            $<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/include>
+                            $<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/src>
                             $<INSTALL_INTERFACE:include> )
 
 target_include_directories( common 
                             SYSTEM INTERFACE
-                            $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/tpl/camp/include>
-                            $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/tpl/camp/include> )
+                            $<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/tpl/camp/include>
+                            $<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/tpl/camp/include> )
 
 install( FILES ${common_headers} 
          DESTINATION include/common )

From 9c3f0d022fbba3a47d50ac2fe7441d17c71c4f46 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Sat, 29 Mar 2025 11:36:34 +0900
Subject: [PATCH 04/26] fixes for absolute paths in aggregateOrSplit.py

---
 scripts/aggregateOrSplit.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/scripts/aggregateOrSplit.py b/scripts/aggregateOrSplit.py
index 89baa12..251174e 100644
--- a/scripts/aggregateOrSplit.py
+++ b/scripts/aggregateOrSplit.py
@@ -20,11 +20,13 @@ def create_dependency_graph(self, header, include_paths=None):
         if include_paths is None:
             include_paths = []
 
+        header = os.path.abspath(header)  # Normalize here
+
         if header in self.dependencies:
             return  # Already processed
 
         self.dependencies[header] = set()
-        base_path = os.path.dirname(os.path.abspath(header))  # Base directory of the current header
+        base_path = os.path.dirname(header)  # Base directory of the current header
 
         try:
             with open(header, 'r') as file:
@@ -34,10 +36,10 @@ def create_dependency_graph(self, header, include_paths=None):
                         included_file = include_match.group(1)
 
                         if included_file != self.config_file:
-                            resolved_path = self.resolve_path(
-                                included_file, base_path, include_paths)
+                            resolved_path = self.resolve_path( included_file, base_path, include_paths)
 
                             if resolved_path:
+                                resolved_path = os.path.abspath(resolved_path)
                                 self.dependencies[header].add(resolved_path)
 
                                 if os.path.exists(resolved_path):
@@ -82,16 +84,21 @@ def resolve_path(self, included_file, base_path, include_paths):
 
         return None  # Return None if no resolution was possible
 
+
     def generate_header_list(self):
         remaining_dependencies = self.dependencies.copy()
         size_of_remaining_dependencies = len(remaining_dependencies)
+        unique_files = set()  # Track unique files by absolute path
 
         while size_of_remaining_dependencies > 0:
             local_included = []
 
             for key in remaining_dependencies:
                 if len(remaining_dependencies[key]) == 0:
-                    self.included_list.append(key)
+                    abs_key = os.path.abspath(key)
+                    if abs_key not in unique_files:
+                        self.included_list.append(abs_key)
+                        unique_files.add(abs_key)
                     local_included.append(key)
 
             for included_key in local_included:
@@ -111,6 +118,7 @@ def process_header(header_path, output):
             """
             Processes a single header file, commenting out includes and pragmas.
             """
+            header_path = os.path.abspath(header_path)
             if header_path in self.included:
                 return  # Avoid duplicate processing
             self.included.add(header_path)
@@ -133,6 +141,7 @@ def process_header(header_path, output):
 
         with open(output_file, 'w') as output:
             for header in headers:
+                header = os.path.abspath(header)
                 self.create_dependency_graph(header, include_paths)
             
             for header in self.dependencies:

From 6ff1f747b22b2d558bf26a9b0b3076507d7c0e35 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Mon, 7 Apr 2025 07:21:32 -0700
Subject: [PATCH 05/26] updated pmpl for data transfer

---
 src/common/pmpl.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
index 1368418..812fc1b 100644
--- a/src/common/pmpl.hpp
+++ b/src/common/pmpl.hpp
@@ -132,6 +132,7 @@ void genericKernelWrapper( int const N, DATA_TYPE * const hostData, LAMBDA && fu
 #if defined(SHIVA_USE_DEVICE)
   DATA_TYPE * deviceData;
   deviceMalloc( &deviceData, N * sizeof(DATA_TYPE) );
+  deviceMemCpy( deviceData, hostData, N * sizeof(DATA_TYPE), cudaMemcpyHostToDevice );
   genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ), deviceData );
   deviceDeviceSynchronize();
   deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost );

From 0582eedc60675205b53539fb070173e34f259121 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Sat, 10 May 2025 13:57:32 -0700
Subject: [PATCH 06/26] use html url for submodules to avoid issues
 TotalEnergies staff were having with relative paths

---
 .gitmodules | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 000410f..a546177 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "cmake/blt"]
 	path = cmake/blt
-	url = ../../LLNL/blt.git
+	url = https://github.com/LLNL/blt.git
 [submodule "tpl/camp"]
 	path = tpl/camp
-	url = ../../LLNL/camp.git
+	url = https://github.com/LLNL/camp.git

From 64a16a79d733f7e0da02e06aef403dfaf69eb654 Mon Sep 17 00:00:00 2001
From: Randolph R Settgast <randolph.settgast@scientificcomputingsolutions.com>
Date: Tue, 20 May 2025 09:38:04 -0500
Subject: [PATCH 07/26] change looping strategy for jacobian to static

---
 docs/doxygen/ShivaConfig.hpp             | 2 +-
 src/geometry/mapping/LinearTransform.hpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index fdee9a6..5187f6d 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -8,7 +8,7 @@
 
 #define SHIVA_VERSION_PATCHLEVEL 0
 
-/* #undef SHIVA_USE_CUDA */
+#define SHIVA_USE_CUDA
 
 /* #undef SHIVA_USE_HIP */
 
diff --git a/src/geometry/mapping/LinearTransform.hpp b/src/geometry/mapping/LinearTransform.hpp
index 86b9536..0b8c2a4 100644
--- a/src/geometry/mapping/LinearTransform.hpp
+++ b/src/geometry/mapping/LinearTransform.hpp
@@ -220,7 +220,7 @@ jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform,
           constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords );
 
           // dimensional loop from domain to codomain
-    #if 0
+    #if 1
           forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr
           {
             constexpr int i = decltype(ici)::value;

From 8abcdf3846bdb5c88fd921d8240736a2f1dca99a Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Tue, 20 May 2025 20:31:36 -0700
Subject: [PATCH 08/26] templatize hard coded real types

---
 docs/doxygen/ShivaConfig.hpp             | 2 +-
 src/geometry/mapping/LinearTransform.hpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index 5187f6d..fdee9a6 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -8,7 +8,7 @@
 
 #define SHIVA_VERSION_PATCHLEVEL 0
 
-#define SHIVA_USE_CUDA
+/* #undef SHIVA_USE_CUDA */
 
 /* #undef SHIVA_USE_HIP */
 
diff --git a/src/geometry/mapping/LinearTransform.hpp b/src/geometry/mapping/LinearTransform.hpp
index 0b8c2a4..a44d7d6 100644
--- a/src/geometry/mapping/LinearTransform.hpp
+++ b/src/geometry/mapping/LinearTransform.hpp
@@ -208,12 +208,12 @@ SHIVA_STATIC_CONSTEXPR_HOSTDEVICE_FORCEINLINE void
 jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform,
           typename LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE >::JacobianType & J )
 {
-  using Transform = std::remove_reference_t< decltype(transform) >;
+  using Transform = LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE >;
   using InterpolatedShape = typename Transform::InterpolatedShape;
   constexpr int DIMS = Transform::numDims;
 
   auto const & nodeCoords = transform.getData();
-  constexpr double qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... };
+  constexpr REAL_TYPE qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... };
 
   InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr
         {

From ece7de77636e12d6662a64947bec0758e44fa0cd Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Wed, 10 Sep 2025 23:11:44 +0900
Subject: [PATCH 09/26] some code review suggestions

---
 src/common/pmpl.hpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
index fd0af6a..7a4346e 100644
--- a/src/common/pmpl.hpp
+++ b/src/common/pmpl.hpp
@@ -37,6 +37,8 @@ namespace shiva
     #define deviceError_t cudaError_t
     #define deviceSuccess cudaSuccess
     #define deviceGetErrorString    cudaGetErrorString
+    #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
+    #define deviceSuccess = cudaSuccess;
     #elif defined(SHIVA_USE_HIP)
     #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
     #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
@@ -46,6 +48,8 @@ namespace shiva
     #define deviceError_t hipError_t
     #define deviceSuccess = hipSuccess;
     #define deviceGetErrorString    hipGetErrorString
+    #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
+    #define deviceSuccess = hipSuccess;
     #endif
 #endif
 
@@ -102,7 +106,7 @@ void genericKernelWrapper( LAMBDA && func, bool const abortOnError = true )
   genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ) );
   // UNCRUSTIFY-ON
   deviceError_t err = deviceDeviceSynchronize();
-  if ( err != cudaSuccess )
+  if ( err != deviceSuccess )
   {
     printf( "Kernel failed: %s\n", deviceGetErrorString( err ));
     if ( abortOnError )
@@ -162,9 +166,9 @@ void genericKernelWrapper( int const N, DATA_TYPE * const hostData, LAMBDA && fu
   genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ), deviceData );
   // UNCRUSTIFY-ON
   deviceError_t err = deviceDeviceSynchronize();
-  deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost );
+  deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), deviceMemcpyDeviceToHost );
   deviceFree( deviceData );
-  if ( err != cudaSuccess )
+  if ( err != deviceSuccess )
   {
     printf( "Kernel failed: %s\n", deviceGetErrorString( err ));
     if ( abortOnError )

From 1c19a835dfb59736dbf17ecd40300a1956e67382 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Wed, 10 Sep 2025 23:19:46 +0900
Subject: [PATCH 10/26] fix bug

---
 src/common/pmpl.hpp                     | 4 ++--
 src/functions/quadrature/Quadrature.hpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
index 7a4346e..17fdb51 100644
--- a/src/common/pmpl.hpp
+++ b/src/common/pmpl.hpp
@@ -38,7 +38,7 @@ namespace shiva
     #define deviceSuccess cudaSuccess
     #define deviceGetErrorString    cudaGetErrorString
     #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
-    #define deviceSuccess = cudaSuccess;
+    #define deviceSuccess cudaSuccess;
     #elif defined(SHIVA_USE_HIP)
     #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
     #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
@@ -49,7 +49,7 @@ namespace shiva
     #define deviceSuccess = hipSuccess;
     #define deviceGetErrorString    hipGetErrorString
     #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
-    #define deviceSuccess = hipSuccess;
+    #define deviceSuccess hipSuccess;
     #endif
 #endif
 
diff --git a/src/functions/quadrature/Quadrature.hpp b/src/functions/quadrature/Quadrature.hpp
index c9e7d5b..1c43301 100644
--- a/src/functions/quadrature/Quadrature.hpp
+++ b/src/functions/quadrature/Quadrature.hpp
@@ -158,7 +158,7 @@ struct QuadratureGaussLobatto : public GaussLobattoSpacing< REAL_TYPE, N >
       //assert( index >= 0 && index < 5 );
       return 0.1 + (index & 1) * 0.4444444444444444444444444444444444 + !( index - 2 ) * 0.6111111111111111111111111111111111;
     }
-    return 0;//std::numeric_limits< REAL_TYPE >::max();
+    return std::numeric_limits< REAL_TYPE >::max();
   }
 
   /**

From f9232c3f6f4be4221496fde45f83682843bd16a2 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Wed, 10 Sep 2025 23:26:54 +0900
Subject: [PATCH 11/26] try to fix bug again

---
 src/common/pmpl.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
index 17fdb51..ce754ab 100644
--- a/src/common/pmpl.hpp
+++ b/src/common/pmpl.hpp
@@ -38,7 +38,7 @@ namespace shiva
     #define deviceSuccess cudaSuccess
     #define deviceGetErrorString    cudaGetErrorString
     #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
-    #define deviceSuccess cudaSuccess;
+    using deviceSuccess cudaSuccess;
     #elif defined(SHIVA_USE_HIP)
     #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
     #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
@@ -49,7 +49,7 @@ namespace shiva
     #define deviceSuccess = hipSuccess;
     #define deviceGetErrorString    hipGetErrorString
     #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
-    #define deviceSuccess hipSuccess;
+    using deviceSuccess hipSuccess;
     #endif
 #endif
 

From a70ffbea109cb65472eaaa74f5b719801d309349 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Wed, 10 Sep 2025 23:31:43 +0900
Subject: [PATCH 12/26] try to fix bug again

---
 src/common/pmpl.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
index ce754ab..85ec642 100644
--- a/src/common/pmpl.hpp
+++ b/src/common/pmpl.hpp
@@ -35,10 +35,9 @@ namespace shiva
     #define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND );
     #define deviceFree( PTR ) cudaFree( PTR );
     #define deviceError_t cudaError_t
-    #define deviceSuccess cudaSuccess
     #define deviceGetErrorString    cudaGetErrorString
     #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
-    using deviceSuccess cudaSuccess;
+    using deviceSuccess = cudaSuccess;
     #elif defined(SHIVA_USE_HIP)
     #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
     #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
@@ -46,10 +45,9 @@ namespace shiva
     #define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND );
     #define deviceFree( PTR ) hipFree( PTR );
     #define deviceError_t hipError_t
-    #define deviceSuccess = hipSuccess;
     #define deviceGetErrorString    hipGetErrorString
     #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
-    using deviceSuccess hipSuccess;
+    using deviceSuccess = hipSuccess;
     #endif
 #endif
 

From 8b0f231a6712b11dd02a4e83c0b82e8c685f3779 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Wed, 10 Sep 2025 23:40:51 +0900
Subject: [PATCH 13/26] try to fix bug again

---
 src/common/pmpl.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
index 85ec642..7a6aa80 100644
--- a/src/common/pmpl.hpp
+++ b/src/common/pmpl.hpp
@@ -37,7 +37,7 @@ namespace shiva
     #define deviceError_t cudaError_t
     #define deviceGetErrorString    cudaGetErrorString
     #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
-    using deviceSuccess = cudaSuccess;
+    constexpr cudaError_t deviceSuccess = cudaSuccess;
     #elif defined(SHIVA_USE_HIP)
     #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
     #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
@@ -47,7 +47,7 @@ namespace shiva
     #define deviceError_t hipError_t
     #define deviceGetErrorString    hipGetErrorString
     #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
-    using deviceSuccess = hipSuccess;
+    constexpr hipError_t deviceSuccess = hipSuccess;
     #endif
 #endif
 

From b9cb71c056a123e5bf8f549d381336deb20b1471 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Wed, 10 Sep 2025 23:46:57 +0900
Subject: [PATCH 14/26] try to fix bug again

---
 src/common/types.hpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/common/types.hpp b/src/common/types.hpp
index 26e22f7..78f9704 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -51,7 +51,9 @@ using tuple = camp::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
-SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t )
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE 
+auto 
+make_tuple( T && ... t )
 {
   return camp::make_tuple( std::forward< T >( t ) ... );
 }
@@ -72,6 +74,7 @@ using tuple = cuda::std::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
 auto make_tuple( T && ... t )
 {
   return cuda::std::make_tuple( std::forward< T >( t ) ... );
@@ -91,7 +94,9 @@ using tuple = std::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
-auto make_tuple( T && ... t )
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
+auto
+make_tuple( T && ... t )
 {
   return std::make_tuple( std::forward< T >( t ) ... );
 }

From 7de0c15a130304350f8fa0dc0544b1c0710484c9 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Thu, 11 Sep 2025 14:48:48 +0900
Subject: [PATCH 15/26] cuda/std/tuple workaround and uncrustify

---
 docs/doxygen/ShivaConfig.hpp                  |  2 +-
 src/common/pmpl.hpp                           | 42 ++++-----
 src/common/types.hpp                          |  4 +-
 .../unitTests/testSequenceUtilities.cpp       | 12 +--
 .../parentElements/ParentElement.hpp          | 20 ++---
 src/functions/bases/BasisProduct.hpp          | 88 +++++++++----------
 src/functions/bases/LagrangeBasis.hpp         | 58 ++++++------
 src/geometry/mapping/LinearTransform.hpp      | 56 ++++++------
 .../mapping/unitTests/testUniformScaling.cpp  |  9 +-
 9 files changed, 149 insertions(+), 142 deletions(-)

diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index fdee9a6..4ac8dd9 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -14,6 +14,6 @@
 
 /* #undef SHIVA_USE_CALIPER */
 
-/* #undef SHIVA_USE_CAMP */
+#define SHIVA_USE_CAMP
 
 #define SHIVA_USE_BOUNDS_CHECK
diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
index 7a6aa80..eb8d3b8 100644
--- a/src/common/pmpl.hpp
+++ b/src/common/pmpl.hpp
@@ -28,27 +28,27 @@
 namespace shiva
 {
 #if defined(SHIVA_USE_DEVICE)
-  #if defined(SHIVA_USE_CUDA)
-    #define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES );
-    #define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES );
-    #define deviceDeviceSynchronize() cudaDeviceSynchronize();
-    #define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND );
-    #define deviceFree( PTR ) cudaFree( PTR );
-    #define deviceError_t cudaError_t
-    #define deviceGetErrorString    cudaGetErrorString
-    #define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
-    constexpr cudaError_t deviceSuccess = cudaSuccess;
-    #elif defined(SHIVA_USE_HIP)
-    #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
-    #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
-    #define deviceDeviceSynchronize() hipDeviceSynchronize();
-    #define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND );
-    #define deviceFree( PTR ) hipFree( PTR );
-    #define deviceError_t hipError_t
-    #define deviceGetErrorString    hipGetErrorString
-    #define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
-    constexpr hipError_t deviceSuccess = hipSuccess;
-    #endif
+#if defined(SHIVA_USE_CUDA)
+#define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES );
+#define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES );
+#define deviceDeviceSynchronize() cudaDeviceSynchronize();
+#define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND );
+#define deviceFree( PTR ) cudaFree( PTR );
+#define deviceError_t cudaError_t
+#define deviceGetErrorString    cudaGetErrorString
+#define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
+constexpr cudaError_t deviceSuccess = cudaSuccess;
+#elif defined(SHIVA_USE_HIP)
+#define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
+#define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
+#define deviceDeviceSynchronize() hipDeviceSynchronize();
+#define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND );
+#define deviceFree( PTR ) hipFree( PTR );
+#define deviceError_t hipError_t
+#define deviceGetErrorString    hipGetErrorString
+#define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
+constexpr hipError_t deviceSuccess = hipSuccess;
+#endif
 #endif
 
 /**
diff --git a/src/common/types.hpp b/src/common/types.hpp
index 78f9704..5ef42c8 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -51,8 +51,8 @@ using tuple = camp::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
-SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE 
-auto 
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
+auto
 make_tuple( T && ... t )
 {
   return camp::make_tuple( std::forward< T >( t ) ... );
diff --git a/src/common/unitTests/testSequenceUtilities.cpp b/src/common/unitTests/testSequenceUtilities.cpp
index fae3cc1..7764e96 100644
--- a/src/common/unitTests/testSequenceUtilities.cpp
+++ b/src/common/unitTests/testSequenceUtilities.cpp
@@ -72,7 +72,7 @@ void testNestedSequenceExpansionLambdaHelper()
       return
         ( executeSequence< 10 >
             ( [ h = Data::h, aa = std::integral_constant< int, a >{} ] ( auto const ... b ) constexpr
-            { return ( (h[aa] * h[b]) + ...); }
+      { return ( (h[aa] * h[b]) + ...); }
             ) + ...
         );
     } );
@@ -119,7 +119,7 @@ void testSequenceExpansionTemplateLambdaHelper()
   kernelLaunch( [] SHIVA_HOST_DEVICE ()
   {
     constexpr int staticSum0 =
-      executeSequence< 10 >( [&] < int ... a > () constexpr
+      executeSequence< 10 >( [&]< int ... a > () constexpr
     {
       return (Data::h[a] + ...);
     } );
@@ -139,13 +139,13 @@ void testSequenceExpansionTemplateLambdaHelper()
 {
   kernelLaunch( [] SHIVA_HOST_DEVICE ()
   {
-    constexpr int staticSum0 = executeSequence< 10 >( [&] < int ... a > () constexpr
+    constexpr int staticSum0 = executeSequence< 10 >( [&]< int ... a > () constexpr
     {
       return
         ( executeSequence< 10 >
           (
-            [ h = Data::h, aa = std::integral_constant< int, a >{} ] < int ... b > () constexpr
-            { return ( (h[aa] * h[b]) + ...); }
+            [ h = Data::h, aa = std::integral_constant< int, a >{} ]< int ... b > () constexpr
+      { return ( (h[aa] * h[b]) + ...); }
           ) + ...
         );
     } );
@@ -167,7 +167,7 @@ void testForSequenceTemplateLambdaHelper()
     {
       int staticSum0 = 0;
       forSequence< 10 >(
-        [&] < int a > () constexpr
+        [&]< int a > () constexpr
       {
         staticSum0 += h[a];
       } );
diff --git a/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp b/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp
index 02c74e6..9d4bdb1 100644
--- a/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp
+++ b/src/discretizations/finiteElementMethod/parentElements/ParentElement.hpp
@@ -110,9 +110,9 @@ class ParentElement
     REAL_TYPE rval = {0};
 
     forNestedSequence< BASIS_TYPE::numSupportPoints... >( [&] ( auto const ... ic_indices ) constexpr
-          {
-            rval = rval + ( value< decltype(ic_indices)::value ... >( parentCoord ) * var( decltype(ic_indices)::value ... ) );
-          } );
+    {
+      rval = rval + ( value< decltype(ic_indices)::value ... >( parentCoord ) * var( decltype(ic_indices)::value ... ) );
+    } );
     return rval;
   }
 
@@ -130,13 +130,13 @@ class ParentElement
   {
     CArrayNd< RealType, numDims > rval = {0.0};
     forNestedSequence< BASIS_TYPE::numSupportPoints... >( [&] ( auto const ... ic_indices ) constexpr
-          {
-            CArrayNd< RealType, numDims > const grad = gradient< decltype(ic_indices)::value ... >( parentCoord );
-            forSequence< numDims >( [&] ( auto const a ) constexpr
-            {
-              rval( a ) = rval( a ) + grad( a ) * var( decltype(ic_indices)::value ... );
-            } );
-          } );
+    {
+      CArrayNd< RealType, numDims > const grad = gradient< decltype(ic_indices)::value ... >( parentCoord );
+      forSequence< numDims >( [&] ( auto const a ) constexpr
+      {
+        rval( a ) = rval( a ) + grad( a ) * var( decltype(ic_indices)::value ... );
+      } );
+    } );
     return rval;
   }
 
diff --git a/src/functions/bases/BasisProduct.hpp b/src/functions/bases/BasisProduct.hpp
index b0f00ab..28f1075 100644
--- a/src/functions/bases/BasisProduct.hpp
+++ b/src/functions/bases/BasisProduct.hpp
@@ -89,21 +89,21 @@ struct BasisProduct
   {
     static_assert( sizeof...(BASIS_FUNCTION_INDICES) == numDims, "Wrong number of basis function indicies specified" );
 
-    return
+
 #if __cplusplus >= 202002L
-      // expand pack over number of dimensions
-      executeSequence< numDims >( [&] < int ... PRODUCT_TERM_INDEX > () constexpr
-        {
-          return ( BASIS_TYPE::template value< BASIS_FUNCTION_INDICES >( parentCoord[PRODUCT_TERM_INDEX] ) * ... );
-        } );
+    // expand pack over number of dimensions
+    return executeSequence< numDims >( [&]< int ... PRODUCT_TERM_INDEX > () constexpr
+    {
+      return ( BASIS_TYPE::template value< BASIS_FUNCTION_INDICES >( parentCoord[PRODUCT_TERM_INDEX] ) * ... );
+    } );
 #else
-      executeSequence< numDims >( [&] ( auto ... PRODUCT_TERM_INDEX ) constexpr
-        {
-          // fold expression to multiply the value of each BASIS_TYPE in each
-          // dimension. In other words the fold expands on BASIS_TYPE...,
-          // BASIS_FUNCTION_INDICES..., and PRODUCT_TERM_INDEX... together.
-          return ( BASIS_TYPES::template value< BASIS_FUNCTION_INDICES >( parentCoord[decltype(PRODUCT_TERM_INDEX)::value] ) * ... );
-        } );
+    return executeSequence< numDims >( [&] ( auto ... PRODUCT_TERM_INDEX ) constexpr
+    {
+      // fold expression to multiply the value of each BASIS_TYPE in each
+      // dimension. In other words the fold expands on BASIS_TYPE...,
+      // BASIS_FUNCTION_INDICES..., and PRODUCT_TERM_INDEX... together.
+      return ( BASIS_TYPES::template value< BASIS_FUNCTION_INDICES >( parentCoord[decltype(PRODUCT_TERM_INDEX)::value] ) * ... );
+    } );
 
 #endif
   }
@@ -133,40 +133,40 @@ struct BasisProduct
     static_assert( sizeof...(BASIS_FUNCTION_INDICES) == numDims, "Wrong number of basis function indicies specified" );
 
 #if __cplusplus >= 202002L
-    return executeSequence< numDims >( [&] < int ... i > () constexpr->CArrayNd< RealType, numDims >
-        {
-          auto gradientComponent = [&] ( auto const iGrad,
-                                         auto const  ... PRODUCT_TERM_INDICES ) constexpr
-          {
-            // Ca
-            return ( gradientComponentHelper< BASIS_TYPES,
-                                              decltype(iGrad)::value,
-                                              BASIS_FUNCTION_INDICES,
-                                              PRODUCT_TERM_INDICES >( parentCoord ) * ... );
-          };
-
-          return { (executeSequence< numDims >( gradientComponent, std::integral_constant< int, i >{} ) )...  };
-        } );
+    return executeSequence< numDims >( [&]< int ... i > () constexpr->CArrayNd< RealType, numDims >
+    {
+      auto gradientComponent = [&] ( auto const iGrad,
+                                     auto const  ... PRODUCT_TERM_INDICES ) constexpr
+      {
+        // Ca
+        return ( gradientComponentHelper< BASIS_TYPES,
+                                          decltype(iGrad)::value,
+                                          BASIS_FUNCTION_INDICES,
+                                          PRODUCT_TERM_INDICES >( parentCoord ) * ... );
+      };
+
+      return { (executeSequence< numDims >( gradientComponent, std::integral_constant< int, i >{} ) )...  };
+    } );
 #else
     // Expand over the dimensions.
     return executeSequence< numDims >( [&] ( auto ... a ) constexpr->CArrayNd< RealType, numDims >
-        {
-          // define a lambda that calculates the gradient of the basis function in
-          // a single dimension/direction.
-          auto gradientComponent = [&] ( auto GRADIENT_COMPONENT, auto ... PRODUCT_TERM_INDICES ) constexpr
-          {
-            // fold expression calling gradientComponentHelper using expanding on
-            // BASIS_TYPE, BASIS_FUNCTION_INDICES, and PRODUCT_TERM_INDICES.
-            return ( gradientComponentHelper< BASIS_TYPES,
-                                              decltype(GRADIENT_COMPONENT)::value,
-                                              BASIS_FUNCTION_INDICES,
-                                              decltype(PRODUCT_TERM_INDICES)::value >( parentCoord ) * ... );
-          };
-
-          // execute the gradientComponent lambda on each direction, expand the
-          // pack on "i" corresponding to each direction of the gradient.
-          return { (executeSequence< numDims >( gradientComponent, a ) )...  };
-        } );
+    {
+      // define a lambda that calculates the gradient of the basis function in
+      // a single dimension/direction.
+      auto gradientComponent = [&] ( auto GRADIENT_COMPONENT, auto ... PRODUCT_TERM_INDICES ) constexpr
+      {
+        // fold expression calling gradientComponentHelper using expanding on
+        // BASIS_TYPE, BASIS_FUNCTION_INDICES, and PRODUCT_TERM_INDICES.
+        return ( gradientComponentHelper< BASIS_TYPES,
+                                          decltype(GRADIENT_COMPONENT)::value,
+                                          BASIS_FUNCTION_INDICES,
+                                          decltype(PRODUCT_TERM_INDICES)::value >( parentCoord ) * ... );
+      };
+
+      // execute the gradientComponent lambda on each direction, expand the
+      // pack on "i" corresponding to each direction of the gradient.
+      return { (executeSequence< numDims >( gradientComponent, a ) )...  };
+    } );
 #endif
   }
 
diff --git a/src/functions/bases/LagrangeBasis.hpp b/src/functions/bases/LagrangeBasis.hpp
index ab2b330..e18bb4a 100644
--- a/src/functions/bases/LagrangeBasis.hpp
+++ b/src/functions/bases/LagrangeBasis.hpp
@@ -82,17 +82,17 @@ class LagrangeBasis : public SPACING_TYPE< REAL_TYPE, ORDER + 1 >
   value( REAL_TYPE const & coord )
   {
 #if __cplusplus >= 202002L
-    return executeSequence< numSupportPoints >( [&] < int ... a > () constexpr
-        {
-          // return fold expression that is the product of all the polynomial
-          // factor terms.
-          return ( valueProductTerm< BF_INDEX, a >( coord ) * ... );
-        } );
+    return executeSequence< numSupportPoints >( [&]< int ... a > () constexpr
+    {
+      // return fold expression that is the product of all the polynomial
+      // factor terms.
+      return ( valueProductTerm< BF_INDEX, a >( coord ) * ... );
+    } );
 #else
     return executeSequence< numSupportPoints >( [&] ( auto const ... a ) constexpr
-        {
-          return ( valueProductTerm< BF_INDEX, decltype(a)::value >( coord ) * ... );
-        } );
+    {
+      return ( valueProductTerm< BF_INDEX, decltype(a)::value >( coord ) * ... );
+    } );
 #endif
   }
 
@@ -117,28 +117,28 @@ class LagrangeBasis : public SPACING_TYPE< REAL_TYPE, ORDER + 1 >
   {
 
 #if __cplusplus >= 202002L
-    return executeSequence< numSupportPoints >( [&coord] < int ... a > () constexpr
-        {
-          auto func = [&coord] < int ... b > ( auto aa ) constexpr
-          {
-            constexpr int aVal = decltype(aa)::value;
-            return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< BF_INDEX, b, aVal >( coord ) * ... );
-          };
-
-          return ( executeSequence< numSupportPoints >( func, std::integral_constant< int, a >{} ) + ... );
-        } );
+    return executeSequence< numSupportPoints >( [&coord]< int ... a > () constexpr
+    {
+      auto func = [&coord]< int ... b > ( auto aa ) constexpr
+      {
+        constexpr int aVal = decltype(aa)::value;
+        return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< BF_INDEX, b, aVal >( coord ) * ... );
+      };
+
+      return ( executeSequence< numSupportPoints >( func, std::integral_constant< int, a >{} ) + ... );
+    } );
 #else
     return executeSequence< numSupportPoints >( [&coord] ( auto const ... a ) constexpr
-        {
-          REAL_TYPE const values[ numSupportPoints ] = { valueProductTerm< BF_INDEX, decltype(a)::value >( coord )... };
-          auto func = [&values] ( auto aa, auto ... b ) constexpr
-          {
-            constexpr int aVal = decltype(aa)::value;
-            return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< decltype(b)::value, aVal >( values ) * ... );
-          };
-
-          return ( executeSequence< numSupportPoints >( func, a ) + ... );
-        } );
+    {
+      REAL_TYPE const values[ numSupportPoints ] = { valueProductTerm< BF_INDEX, decltype(a)::value >( coord )... };
+      auto func = [&values] ( auto aa, auto ... b ) constexpr
+      {
+        constexpr int aVal = decltype(aa)::value;
+        return gradientOfValueTerm< BF_INDEX, aVal >() * ( valueProductFactor< decltype(b)::value, aVal >( values ) * ... );
+      };
+
+      return ( executeSequence< numSupportPoints >( func, a ) + ... );
+    } );
 #endif
   }
 
diff --git a/src/geometry/mapping/LinearTransform.hpp b/src/geometry/mapping/LinearTransform.hpp
index a44d7d6..7fde65f 100644
--- a/src/geometry/mapping/LinearTransform.hpp
+++ b/src/geometry/mapping/LinearTransform.hpp
@@ -169,17 +169,17 @@ jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform,
 
   auto const & nodeCoords = transform.getData();
   InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr
-        {
-          CArrayNd< REAL_TYPE, DIMS > const dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( pointCoordsParent );
-          // dimensional loop from domain to codomain
-          forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr
-          {
-            constexpr int i = decltype(ici)::value;
-            constexpr int j = decltype(icj)::value;
-            J( i, j ) = J( i, j ) + dNadXi( j ) * nodeCoords( decltype(ic_spIndices)::value ..., i );
-          } );
-
-        } );
+  {
+    CArrayNd< REAL_TYPE, DIMS > const dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( pointCoordsParent );
+    // dimensional loop from domain to codomain
+    forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr
+    {
+      constexpr int i = decltype(ici)::value;
+      constexpr int j = decltype(icj)::value;
+      J( i, j ) = J( i, j ) + dNadXi( j ) * nodeCoords( decltype(ic_spIndices)::value ..., i );
+    } );
+
+  } );
 }
 
 
@@ -216,27 +216,27 @@ jacobian( LinearTransform< REAL_TYPE, INTERPOLATED_SHAPE > const & transform,
   constexpr REAL_TYPE qcoords[3] = { ( QUADRATURE::template coordinate< QA >() )... };
 
   InterpolatedShape::supportLoop( [&] ( auto const ... ic_spIndices ) constexpr
-        {
-          constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords );
+  {
+    constexpr CArrayNd< REAL_TYPE, DIMS > dNadXi = InterpolatedShape::template gradient< decltype(ic_spIndices)::value ... >( qcoords );
 
-          // dimensional loop from domain to codomain
+    // dimensional loop from domain to codomain
     #if 1
-          forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr
-          {
-            constexpr int i = decltype(ici)::value;
-            constexpr int j = decltype(icj)::value;
-            J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j );
-          } );
+    forNestedSequence< DIMS, DIMS >( [&] ( auto const ici, auto const icj ) constexpr
+    {
+      constexpr int i = decltype(ici)::value;
+      constexpr int j = decltype(icj)::value;
+      J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j );
+    } );
     #else
-          for ( int j = 0; j < DIMS; ++j )
-          {
-            for ( int i = 0; i < DIMS; ++i )
-            {
-              J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j );
-            }
-          }
+    for ( int j = 0; j < DIMS; ++j )
+    {
+      for ( int i = 0; i < DIMS; ++i )
+      {
+        J( j, i ) = J( j, i ) + dNadXi( i ) * nodeCoords( decltype(ic_spIndices)::value ..., j );
+      }
+    }
     #endif
-        } );
+  } );
 }
 
 
diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp
index bc3c93a..5cafab9 100644
--- a/src/geometry/mapping/unitTests/testUniformScaling.cpp
+++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp
@@ -99,7 +99,14 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue )
   double const h = 3.14;
   auto cell = makeUniformScaling( h );
 
-  auto [ detJ, invJ ] = inverseJacobian( cell );
+  // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple
+#if defined(SHIVA_USE_CUDA) && defined(CUDA_VERSION) && (CUDA_VERSION < 12000)
+  auto tmp  = inverseJacobian( cell );
+  auto detJ = shiva::get< 0 >( tmp );
+  auto invJ = shiva::get< 1 >( tmp );
+#else
+  auto [detJ, invJ] = inverseJacobian( cell );
+#endif
   EXPECT_EQ( detJ, 0.125 * h * h * h );
   EXPECT_EQ( invJ( 0 ), ( 2 / h ) );
 }

From 3b4b89bb448c7970fd5d29d72b3d6e2e8705f2bd Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Thu, 11 Sep 2025 15:29:36 +0900
Subject: [PATCH 16/26] try again

---
 cmake/CMakeBasics.cmake                       |  5 ++-
 src/common/types.hpp                          | 33 ++++++++++++++++++-
 .../mapping/unitTests/testUniformScaling.cpp  |  2 +-
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake
index 0915345..9ac48ea 100644
--- a/cmake/CMakeBasics.cmake
+++ b/cmake/CMakeBasics.cmake
@@ -27,6 +27,5 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG
                                  CLANG "-fstandalone-debug"
                                 )
 
-set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "")
-
-set( CAMP_ENABLE_TESTS OFF CACHE BOOL "")
+option( SHIVA_ENABLE_CAMP OFF )
+option( CAMP_ENABLE_TESTS OFF )
diff --git a/src/common/types.hpp b/src/common/types.hpp
index 5ef42c8..0f0198a 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -19,11 +19,11 @@
 
 #include "common/ShivaMacros.hpp"
 
+
 /// @brief Macro to define whether or not to use camp.
 #if defined(SHIVA_USE_CAMP)
 #include <camp/camp.hpp>
 #else
-
 #if defined(SHIVA_USE_CUDA)
 #include <cuda/std/tuple>
 #else
@@ -58,8 +58,29 @@ make_tuple( T && ... t )
   return camp::make_tuple( std::forward< T >( t ) ... );
 }
 
+#define SHIVA_HAVE_TUPLE_SB 1
+
 #else
 #if defined(SHIVA_USE_CUDA)
+
+// libcudacxx (CCCL) API version: e.g. 120400 for CUDA 12.4
+#ifndef _LIBCUDACXX_CUDA_API_VERSION
+  #define _LIBCUDACXX_CUDA_API_VERSION 0
+#endif
+
+#ifndef CUDART_VERSION
+  #define CUDART_VERSION 0
+#endif
+
+// Prefer the libcudacxx API version if present; fall back to runtime version.
+#if (_LIBCUDACXX_CUDA_API_VERSION >= 120400) || (CUDART_VERSION >= 12040)
+  #define SHIVA_HAVE_TUPLE_SB 1
+#else
+  #define SHIVA_HAVE_TUPLE_SB 0
+#endif
+
+
+
 /**
  * @brief Wrapper for cuda::std::tuple.
  * @tparam T Types of the elements of the tuple.
@@ -100,6 +121,9 @@ make_tuple( T && ... t )
 {
   return std::make_tuple( std::forward< T >( t ) ... );
 }
+
+#define SHIVA_HAVE_TUPLE_SB 1
+
 #endif
 #endif
 
@@ -117,4 +141,11 @@ using int_sequence = std::integer_sequence< int, T ... >;
 template< int N >
 using make_int_sequence = std::make_integer_sequence< int, N >;
 
+
+
 }
+
+#if defined(__CUDA_ARCH__)
+  #undef  SHIVA_HAVE_TUPLE_SB
+  #define SHIVA_HAVE_TUPLE_SB 0
+#endif
diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp
index 5cafab9..810db22 100644
--- a/src/geometry/mapping/unitTests/testUniformScaling.cpp
+++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp
@@ -100,7 +100,7 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue )
   auto cell = makeUniformScaling( h );
 
   // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple
-#if defined(SHIVA_USE_CUDA) && defined(CUDA_VERSION) && (CUDA_VERSION < 12000)
+#if defined(SHIVA_USE_CUDA) && SHIVA_HAVE_TUPLE_SB == 0
   auto tmp  = inverseJacobian( cell );
   auto detJ = shiva::get< 0 >( tmp );
   auto invJ = shiva::get< 1 >( tmp );

From 8598f077ad4a7c29448a34cd7c893bc12acdf877 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Thu, 11 Sep 2025 15:37:57 +0900
Subject: [PATCH 17/26] try again

---
 src/common/types.hpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/common/types.hpp b/src/common/types.hpp
index 0f0198a..fce8dfa 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -79,8 +79,6 @@ make_tuple( T && ... t )
   #define SHIVA_HAVE_TUPLE_SB 0
 #endif
 
-
-
 /**
  * @brief Wrapper for cuda::std::tuple.
  * @tparam T Types of the elements of the tuple.
@@ -100,6 +98,9 @@ auto make_tuple( T && ... t )
 {
   return cuda::std::make_tuple( std::forward< T >( t ) ... );
 }
+
+using cuda::std::get;     // expose cuda::std::get as shiva::get
+
 #else
 /**
  * @brief Wrapper for std::tuple.
@@ -123,6 +124,7 @@ make_tuple( T && ... t )
 }
 
 #define SHIVA_HAVE_TUPLE_SB 1
+using std::get;
 
 #endif
 #endif
@@ -142,10 +144,4 @@ template< int N >
 using make_int_sequence = std::make_integer_sequence< int, N >;
 
 
-
 }
-
-#if defined(__CUDA_ARCH__)
-  #undef  SHIVA_HAVE_TUPLE_SB
-  #define SHIVA_HAVE_TUPLE_SB 0
-#endif

From 2d82182dae2f2e4dc1c9bf99039c0788edaae3e5 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Thu, 11 Sep 2025 16:08:33 +0900
Subject: [PATCH 18/26] remove a bunch of crap...just don't use the bindings
 for cuda wihtout camp

---
 src/common/types.hpp                          | 33 ++-----------------
 .../mapping/unitTests/testUniformScaling.cpp  |  6 ++--
 2 files changed, 6 insertions(+), 33 deletions(-)

diff --git a/src/common/types.hpp b/src/common/types.hpp
index fce8dfa..78f9704 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -19,11 +19,11 @@
 
 #include "common/ShivaMacros.hpp"
 
-
 /// @brief Macro to define whether or not to use camp.
 #if defined(SHIVA_USE_CAMP)
 #include <camp/camp.hpp>
 #else
+
 #if defined(SHIVA_USE_CUDA)
 #include <cuda/std/tuple>
 #else
@@ -51,34 +51,15 @@ using tuple = camp::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
-SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
-auto
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE 
+auto 
 make_tuple( T && ... t )
 {
   return camp::make_tuple( std::forward< T >( t ) ... );
 }
 
-#define SHIVA_HAVE_TUPLE_SB 1
-
 #else
 #if defined(SHIVA_USE_CUDA)
-
-// libcudacxx (CCCL) API version: e.g. 120400 for CUDA 12.4
-#ifndef _LIBCUDACXX_CUDA_API_VERSION
-  #define _LIBCUDACXX_CUDA_API_VERSION 0
-#endif
-
-#ifndef CUDART_VERSION
-  #define CUDART_VERSION 0
-#endif
-
-// Prefer the libcudacxx API version if present; fall back to runtime version.
-#if (_LIBCUDACXX_CUDA_API_VERSION >= 120400) || (CUDART_VERSION >= 12040)
-  #define SHIVA_HAVE_TUPLE_SB 1
-#else
-  #define SHIVA_HAVE_TUPLE_SB 0
-#endif
-
 /**
  * @brief Wrapper for cuda::std::tuple.
  * @tparam T Types of the elements of the tuple.
@@ -98,9 +79,6 @@ auto make_tuple( T && ... t )
 {
   return cuda::std::make_tuple( std::forward< T >( t ) ... );
 }
-
-using cuda::std::get;     // expose cuda::std::get as shiva::get
-
 #else
 /**
  * @brief Wrapper for std::tuple.
@@ -122,10 +100,6 @@ make_tuple( T && ... t )
 {
   return std::make_tuple( std::forward< T >( t ) ... );
 }
-
-#define SHIVA_HAVE_TUPLE_SB 1
-using std::get;
-
 #endif
 #endif
 
@@ -143,5 +117,4 @@ using int_sequence = std::integer_sequence< int, T ... >;
 template< int N >
 using make_int_sequence = std::make_integer_sequence< int, N >;
 
-
 }
diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp
index 810db22..0665e5a 100644
--- a/src/geometry/mapping/unitTests/testUniformScaling.cpp
+++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp
@@ -100,12 +100,12 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue )
   auto cell = makeUniformScaling( h );
 
   // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple
-#if defined(SHIVA_USE_CUDA) && SHIVA_HAVE_TUPLE_SB == 0
+#if defined(SHIVA_USE_CAMP) || !defined(SHIVA_USE_CUDA)
+  auto [detJ, invJ] = inverseJacobian( cell );
+#else
   auto tmp  = inverseJacobian( cell );
   auto detJ = shiva::get< 0 >( tmp );
   auto invJ = shiva::get< 1 >( tmp );
-#else
-  auto [detJ, invJ] = inverseJacobian( cell );
 #endif
   EXPECT_EQ( detJ, 0.125 * h * h * h );
   EXPECT_EQ( invJ( 0 ), ( 2 / h ) );

From 3cba8eb67ab38c1036c09fb2c5382724fe284f33 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Thu, 11 Sep 2025 16:34:36 +0900
Subject: [PATCH 19/26] add cuda version through cmake

---
 cmake/CMakeBasics.cmake                       | 21 +++++++++++++++++++
 docs/doxygen/ShivaConfig.hpp                  |  4 ++++
 src/ShivaConfig.hpp.in                        |  6 +++++-
 src/common/types.hpp                          |  4 ++--
 .../mapping/unitTests/testUniformScaling.cpp  |  2 +-
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake
index 9ac48ea..f38bccc 100644
--- a/cmake/CMakeBasics.cmake
+++ b/cmake/CMakeBasics.cmake
@@ -29,3 +29,24 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG
 
 option( SHIVA_ENABLE_CAMP OFF )
 option( CAMP_ENABLE_TESTS OFF )
+
+
+if( ENABLE_CUDA )
+  # Extract CUDA version from CMake’s variables
+  set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION})
+
+  # Also normalize to an integer for easy comparison (e.g. 12040 for 12.4.0)
+  string(REPLACE "." ";" CUDA_VERSION_LIST ${CUDAToolkit_VERSION})
+  list(GET CUDA_VERSION_LIST 0 CUDA_MAJOR)
+  list(GET CUDA_VERSION_LIST 1 CUDA_MINOR)
+  list(GET CUDA_VERSION_LIST 2 CUDA_PATCH)
+
+  math(EXPR CUDA_VERSION_INT "${CUDA_MAJOR}*1000 + ${CUDA_MINOR}*10 + ${CUDA_PATCH}")
+
+  target_compile_definitions( shiva PUBLIC
+                              SHIVA_CUDA_VERSION_STR="${CUDAToolkit_VERSION}"
+                              SHIVA_CUDA_VERSION_INT=${CUDA_VERSION_INT}
+                              SHIVA_CUDA_MAJOR=${CUDA_MAJOR}
+                              SHIVA_CUDA_MINOR=${CUDA_MINOR}
+                            )
+endif()
diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index 4ac8dd9..c2e3f65 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -17,3 +17,7 @@
 #define SHIVA_USE_CAMP
 
 #define SHIVA_USE_BOUNDS_CHECK
+
+/* #undef SHIVA_CUDA_MAJOR */
+
+/* #undef SHIVA_CUDA_MINOR */
diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in
index f0f4806..56dbc07 100644
--- a/src/ShivaConfig.hpp.in
+++ b/src/ShivaConfig.hpp.in
@@ -16,4 +16,8 @@
 
 #cmakedefine SHIVA_USE_CAMP
 
-#cmakedefine SHIVA_USE_BOUNDS_CHECK
\ No newline at end of file
+#cmakedefine SHIVA_USE_BOUNDS_CHECK
+
+#cmakedefine SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@
+
+#cmakedefine SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@
\ No newline at end of file
diff --git a/src/common/types.hpp b/src/common/types.hpp
index 78f9704..5ef42c8 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -51,8 +51,8 @@ using tuple = camp::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
-SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE 
-auto 
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
+auto
 make_tuple( T && ... t )
 {
   return camp::make_tuple( std::forward< T >( t ) ... );
diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp
index 0665e5a..f36a478 100644
--- a/src/geometry/mapping/unitTests/testUniformScaling.cpp
+++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp
@@ -100,7 +100,7 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue )
   auto cell = makeUniformScaling( h );
 
   // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple
-#if defined(SHIVA_USE_CAMP) || !defined(SHIVA_USE_CUDA)
+#if defined(SHIVA_USE_CAMP) || SHIVA_CUDA_MAJOR >= 12 || !defined(SHIVA_USE_CUDA)
   auto [detJ, invJ] = inverseJacobian( cell );
 #else
   auto tmp  = inverseJacobian( cell );

From a34ddcf6f77c5e05a9702df32168a410fef8ae80 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Thu, 11 Sep 2025 17:01:07 +0900
Subject: [PATCH 20/26] try again buddy

---
 cmake/CMakeBasics.cmake                       | 33 +++++++++----------
 docs/doxygen/ShivaConfig.hpp                  |  7 ++--
 src/ShivaConfig.hpp.in                        |  7 ++--
 .../mapping/unitTests/testUniformScaling.cpp  |  6 ++--
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake
index f38bccc..61844a8 100644
--- a/cmake/CMakeBasics.cmake
+++ b/cmake/CMakeBasics.cmake
@@ -32,21 +32,20 @@ option( CAMP_ENABLE_TESTS OFF )
 
 
 if( ENABLE_CUDA )
-  # Extract CUDA version from CMake’s variables
-  set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION})
-
-  # Also normalize to an integer for easy comparison (e.g. 12040 for 12.4.0)
-  string(REPLACE "." ";" CUDA_VERSION_LIST ${CUDAToolkit_VERSION})
-  list(GET CUDA_VERSION_LIST 0 CUDA_MAJOR)
-  list(GET CUDA_VERSION_LIST 1 CUDA_MINOR)
-  list(GET CUDA_VERSION_LIST 2 CUDA_PATCH)
-
-  math(EXPR CUDA_VERSION_INT "${CUDA_MAJOR}*1000 + ${CUDA_MINOR}*10 + ${CUDA_PATCH}")
-
-  target_compile_definitions( shiva PUBLIC
-                              SHIVA_CUDA_VERSION_STR="${CUDAToolkit_VERSION}"
-                              SHIVA_CUDA_VERSION_INT=${CUDA_VERSION_INT}
-                              SHIVA_CUDA_MAJOR=${CUDA_MAJOR}
-                              SHIVA_CUDA_MINOR=${CUDA_MINOR}
-                            )
+  if(CUDAToolkit_FOUND AND CUDAToolkit_VERSION)
+    set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION})
+    string(REPLACE "." ";" _ver_list ${CUDAToolkit_VERSION})
+    list(GET _ver_list 0 SHIVA_CUDA_MAJOR)
+    list(GET _ver_list 1 SHIVA_CUDA_MINOR)
+    list(GET _ver_list 2 SHIVA_CUDA_PATCHLEVEL)
+    math(EXPR SHIVA_CUDA_VERSION_INT "${SHIVA_CUDA_MAJOR}*1000 + ${SHIVA_CUDA_MINOR}*10 + ${SHIVA_CUDA_PATCHLEVEL}")
+  else()
+    message(FATAL_ERROR "Could not determine CUDA version. Please set CUDAToolkit_ROOT to the location of your CUDA installation.")
+  endif()
+else()
+  set(SHIVA_CUDA_VERSION "0.0.0")
+  set(SHIVA_CUDA_MAJOR 0)
+  set(SHIVA_CUDA_MINOR 0)
+  set(SHIVA_CUDA_PATCHLEVEL 0)
+  set(SHIVA_CUDA_VERSION_INT 0)
 endif()
diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index c2e3f65..62fe5b0 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -18,6 +18,7 @@
 
 #define SHIVA_USE_BOUNDS_CHECK
 
-/* #undef SHIVA_CUDA_MAJOR */
-
-/* #undef SHIVA_CUDA_MINOR */
+#define SHIVA_CUDA_MAJOR        0
+#define SHIVA_CUDA_MINOR        0
+#define SHIVA_CUDA_PATCHLEVEL   0
+#define SHIVA_CUDA_VERSION_INT  0
diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in
index 56dbc07..e8ea9ce 100644
--- a/src/ShivaConfig.hpp.in
+++ b/src/ShivaConfig.hpp.in
@@ -18,6 +18,7 @@
 
 #cmakedefine SHIVA_USE_BOUNDS_CHECK
 
-#cmakedefine SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@
-
-#cmakedefine SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@
\ No newline at end of file
+#define SHIVA_CUDA_MAJOR        @SHIVA_CUDA_MAJOR@
+#define SHIVA_CUDA_MINOR        @SHIVA_CUDA_MINOR@
+#define SHIVA_CUDA_PATCHLEVEL   @SHIVA_CUDA_PATCHLEVEL@
+#define SHIVA_CUDA_VERSION_INT  @SHIVA_CUDA_VERSION_INT@
\ No newline at end of file
diff --git a/src/geometry/mapping/unitTests/testUniformScaling.cpp b/src/geometry/mapping/unitTests/testUniformScaling.cpp
index f36a478..543ae70 100644
--- a/src/geometry/mapping/unitTests/testUniformScaling.cpp
+++ b/src/geometry/mapping/unitTests/testUniformScaling.cpp
@@ -100,12 +100,12 @@ TEST( testUniformScaling, testInvJacobianFunctionReturnByValue )
   auto cell = makeUniformScaling( h );
 
   // libcudacxx in CUDA 11 lacks SB support for cuda::std::tuple
-#if defined(SHIVA_USE_CAMP) || SHIVA_CUDA_MAJOR >= 12 || !defined(SHIVA_USE_CUDA)
-  auto [detJ, invJ] = inverseJacobian( cell );
-#else
+#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12
   auto tmp  = inverseJacobian( cell );
   auto detJ = shiva::get< 0 >( tmp );
   auto invJ = shiva::get< 1 >( tmp );
+#else
+  auto [detJ, invJ] = inverseJacobian( cell );
 #endif
   EXPECT_EQ( detJ, 0.125 * h * h * h );
   EXPECT_EQ( invJ( 0 ), ( 2 / h ) );

From 93cd3aa567bee63387f60f8c7eb47200c69c6664 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Thu, 11 Sep 2025 18:24:32 +0900
Subject: [PATCH 21/26] stop listening to chat

---
 cmake/CMakeBasics.cmake      | 21 ++++++++-------------
 docs/doxygen/ShivaConfig.hpp |  4 ++--
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake
index 61844a8..82a64a7 100644
--- a/cmake/CMakeBasics.cmake
+++ b/cmake/CMakeBasics.cmake
@@ -32,20 +32,15 @@ option( CAMP_ENABLE_TESTS OFF )
 
 
 if( ENABLE_CUDA )
-  if(CUDAToolkit_FOUND AND CUDAToolkit_VERSION)
-    set(SHIVA_CUDA_VERSION ${CUDAToolkit_VERSION})
-    string(REPLACE "." ";" _ver_list ${CUDAToolkit_VERSION})
-    list(GET _ver_list 0 SHIVA_CUDA_MAJOR)
-    list(GET _ver_list 1 SHIVA_CUDA_MINOR)
-    list(GET _ver_list 2 SHIVA_CUDA_PATCHLEVEL)
-    math(EXPR SHIVA_CUDA_VERSION_INT "${SHIVA_CUDA_MAJOR}*1000 + ${SHIVA_CUDA_MINOR}*10 + ${SHIVA_CUDA_PATCHLEVEL}")
+  if( CUDA_VERSION AND CUDA_VERSION_MAJOR AND CUDA_VERSION_MINOR )
+    set( SHIVA_CUDA_VERSION ${CUDA_VERSION} )
+    set( SHIVA_CUDA_MAJOR ${CUDA_VERSION_MAJOR} )
+    set( SHIVA_CUDA_MINOR ${CUDA_VERSION_MINOR} )
   else()
-    message(FATAL_ERROR "Could not determine CUDA version. Please set CUDAToolkit_ROOT to the location of your CUDA installation.")
+    message(FATAL_ERROR "CUDA_VERSION_MAJOR and CUDA_VERSION_MINOR not defined")
   endif()
 else()
-  set(SHIVA_CUDA_VERSION "0.0.0")
-  set(SHIVA_CUDA_MAJOR 0)
-  set(SHIVA_CUDA_MINOR 0)
-  set(SHIVA_CUDA_PATCHLEVEL 0)
-  set(SHIVA_CUDA_VERSION_INT 0)
+  set( SHIVA_CUDA_VERSION 0 )
+  set( SHIVA_CUDA_MAJOR 0 )
+  set( SHIVA_CUDA_MINOR 0 )
 endif()
diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index 62fe5b0..493d655 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -20,5 +20,5 @@
 
 #define SHIVA_CUDA_MAJOR        0
 #define SHIVA_CUDA_MINOR        0
-#define SHIVA_CUDA_PATCHLEVEL   0
-#define SHIVA_CUDA_VERSION_INT  0
+#define SHIVA_CUDA_PATCHLEVEL   
+#define SHIVA_CUDA_VERSION_INT  

From 723a02c6b9c0db19ea6d16daac6c6cb965ee58c9 Mon Sep 17 00:00:00 2001
From: Randolph R Settgast <randolph.settgast@scientificcomputingsolutions.com>
Date: Thu, 11 Sep 2025 08:18:59 -0500
Subject: [PATCH 22/26] fix some issues

---
 docs/doxygen/ShivaConfig.hpp |  8 +++----
 src/ShivaConfig.hpp.in       |  2 --
 src/common/ShivaMacros.hpp   | 43 +++++++++++++++++++++++++++++-------
 src/common/types.hpp         |  1 +
 4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index 493d655..b0a1aa6 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -8,17 +8,17 @@
 
 #define SHIVA_VERSION_PATCHLEVEL 0
 
-/* #undef SHIVA_USE_CUDA */
+#define SHIVA_USE_CUDA
 
 /* #undef SHIVA_USE_HIP */
 
 /* #undef SHIVA_USE_CALIPER */
 
-#define SHIVA_USE_CAMP
+/* #undef SHIVA_USE_CAMP */
 
 #define SHIVA_USE_BOUNDS_CHECK
 
-#define SHIVA_CUDA_MAJOR        0
-#define SHIVA_CUDA_MINOR        0
+#define SHIVA_CUDA_MAJOR        12
+#define SHIVA_CUDA_MINOR        6
 #define SHIVA_CUDA_PATCHLEVEL   
 #define SHIVA_CUDA_VERSION_INT  
diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in
index e8ea9ce..7103b82 100644
--- a/src/ShivaConfig.hpp.in
+++ b/src/ShivaConfig.hpp.in
@@ -20,5 +20,3 @@
 
 #define SHIVA_CUDA_MAJOR        @SHIVA_CUDA_MAJOR@
 #define SHIVA_CUDA_MINOR        @SHIVA_CUDA_MINOR@
-#define SHIVA_CUDA_PATCHLEVEL   @SHIVA_CUDA_PATCHLEVEL@
-#define SHIVA_CUDA_VERSION_INT  @SHIVA_CUDA_VERSION_INT@
\ No newline at end of file
diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp
index c3280e1..0ce8264 100644
--- a/src/common/ShivaMacros.hpp
+++ b/src/common/ShivaMacros.hpp
@@ -110,11 +110,38 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
  * @param cond The condition to assert is true.
  * @param ... The message to print if the assertion fails.
  */
-#define SHIVA_ASSERT_MSG( cond, ... ) \
-        do { \
-          if ( !(cond)) { \
-            if ( !__builtin_is_constant_evaluated()) { \
-              shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
-            } \
-          } \
-        } while ( 0 )
+#if defined(__CUDACC__)
+// NVCC: avoid is_constant_evaluated/if consteval to silence #3060.
+// (Optional: you can also add --diag-suppress=3060 instead.)
+  #define SHIVA_ASSERT_MSG(cond, ...)                                        \
+    do {                                                                     \
+      if (!(cond)) {                                                         \
+        shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__);         \
+      }                                                                      \
+    } while (0)
+#elif defined(__cpp_if_consteval) && __cpp_if_consteval >= 202106L
+// Modern C++: prefer 'if consteval'
+  #define SHIVA_ASSERT_MSG(cond, ...)                                        \
+    do {                                                                     \
+      if (!(cond)) {                                                         \
+        if consteval {                                                       \
+          static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \
+        } else {                                                             \
+          shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__);       \
+        }                                                                    \
+      }                                                                      \
+    } while (0)
+#else
+// Portable fallback using std::is_constant_evaluated (no NVCC)
+  #include <type_traits>
+  #define SHIVA_ASSERT_MSG(cond, ...)                                        \
+    do {                                                                     \
+      if (!(cond)) {                                                         \
+        if (!std::is_constant_evaluated()) {                                 \
+          shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__);       \
+        } else {                                                             \
+          static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \
+        }                                                                    \
+      }                                                                      \
+    } while (0)
+#endif
diff --git a/src/common/types.hpp b/src/common/types.hpp
index 5ef42c8..9c1ce28 100644
--- a/src/common/types.hpp
+++ b/src/common/types.hpp
@@ -66,6 +66,7 @@ make_tuple( T && ... t )
  */
 template< typename ... T >
 using tuple = cuda::std::tuple< T ... >;
+using cuda::std::get;
 
 /**
  * @brief Wrapper for cuda::std::make_tuple.

From cbca43cc684cc3162580d0ce31036aa11d9432f4 Mon Sep 17 00:00:00 2001
From: Randolph R Settgast <randolph.settgast@scientificcomputingsolutions.com>
Date: Thu, 11 Sep 2025 08:22:06 -0500
Subject: [PATCH 23/26] add maple hostconfig

---
 hostconfigs/TTE/maple_rocky9.cmake | 24 +++++++++++++++++
 src/common/ShivaMacros.hpp         | 43 ++++++------------------------
 2 files changed, 32 insertions(+), 35 deletions(-)
 create mode 100644 hostconfigs/TTE/maple_rocky9.cmake

diff --git a/hostconfigs/TTE/maple_rocky9.cmake b/hostconfigs/TTE/maple_rocky9.cmake
new file mode 100644
index 0000000..e3f4a83
--- /dev/null
+++ b/hostconfigs/TTE/maple_rocky9.cmake
@@ -0,0 +1,24 @@
+set(CONFIG_NAME "maple_rocky9" CACHE PATH "") 
+
+set(COMPILER_DIR  /opt/rh/gcc-toolset-13/root/ )
+set(CMAKE_C_COMPILER ${COMPILER_DIR}/bin/gcc CACHE PATH "")
+set(CMAKE_CXX_COMPILER ${COMPILER_DIR}/bin/g++ CACHE PATH "")
+
+# C++ options
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -mtune=native -march=native" CACHE STRING "")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "")
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
+
+# Cuda options
+set(ENABLE_CUDA ON CACHE BOOL "")
+set(CUDA_TOOLKIT_ROOT_DIR /hrtc/apps/cuda/12.6.20/aarch64/rocky9 CACHE STRING "")
+set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "")
+set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "")
+set(CMAKE_CUDA_ARCHITECTURES 90 CACHE STRING "")
+set(CMAKE_CUDA_STANDARD 17 CACHE STRING "")
+set(CMAKE_CUDA_FLAGS "-restrict --expt-extended-lambda --expt-relaxed-constexpr -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "")
+#set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3 -Xcompiler -mcpu=powerpc64le -Xcompiler -mtune=powerpc64le" CACHE STRING "")
+#set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "")
+#set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "")
+
+set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "Disable CAMP support" FORCE )
\ No newline at end of file
diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp
index 0ce8264..c3280e1 100644
--- a/src/common/ShivaMacros.hpp
+++ b/src/common/ShivaMacros.hpp
@@ -110,38 +110,11 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
  * @param cond The condition to assert is true.
  * @param ... The message to print if the assertion fails.
  */
-#if defined(__CUDACC__)
-// NVCC: avoid is_constant_evaluated/if consteval to silence #3060.
-// (Optional: you can also add --diag-suppress=3060 instead.)
-  #define SHIVA_ASSERT_MSG(cond, ...)                                        \
-    do {                                                                     \
-      if (!(cond)) {                                                         \
-        shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__);         \
-      }                                                                      \
-    } while (0)
-#elif defined(__cpp_if_consteval) && __cpp_if_consteval >= 202106L
-// Modern C++: prefer 'if consteval'
-  #define SHIVA_ASSERT_MSG(cond, ...)                                        \
-    do {                                                                     \
-      if (!(cond)) {                                                         \
-        if consteval {                                                       \
-          static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \
-        } else {                                                             \
-          shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__);       \
-        }                                                                    \
-      }                                                                      \
-    } while (0)
-#else
-// Portable fallback using std::is_constant_evaluated (no NVCC)
-  #include <type_traits>
-  #define SHIVA_ASSERT_MSG(cond, ...)                                        \
-    do {                                                                     \
-      if (!(cond)) {                                                         \
-        if (!std::is_constant_evaluated()) {                                 \
-          shivaAssertionFailed(__FILE__, __LINE__, true, __VA_ARGS__);       \
-        } else {                                                             \
-          static_assert((cond), "SHIVA_ASSERT_MSG failed in constant eval"); \
-        }                                                                    \
-      }                                                                      \
-    } while (0)
-#endif
+#define SHIVA_ASSERT_MSG( cond, ... ) \
+        do { \
+          if ( !(cond)) { \
+            if ( !__builtin_is_constant_evaluated()) { \
+              shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
+            } \
+          } \
+        } while ( 0 )

From 85e40e00cb1b08f9066180542e3bc6c504bdc2fa Mon Sep 17 00:00:00 2001
From: "Randolph R. Settgast" <settgast1@llnl.gov>
Date: Thu, 11 Sep 2025 18:25:41 -0700
Subject: [PATCH 24/26] some fixes

---
 cmake/blt                                     |  2 +-
 docs/doxygen/ShivaConfig.hpp                  |  6 +--
 src/common/ShivaMacros.hpp                    | 46 ++++++++++++++++---
 .../mapping/unitTests/testLinearTransform.cpp |  6 +++
 .../mapping/unitTests/testScaling.cpp         |  8 +++-
 5 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/cmake/blt b/cmake/blt
index fb4246b..9ff7734 160000
--- a/cmake/blt
+++ b/cmake/blt
@@ -1 +1 @@
-Subproject commit fb4246b8bae74c3d7291bef9698fd38863844680
+Subproject commit 9ff77344f0b2a6ee345e452bddd6bfd46cbbfa35
diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index b0a1aa6..b186086 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -18,7 +18,5 @@
 
 #define SHIVA_USE_BOUNDS_CHECK
 
-#define SHIVA_CUDA_MAJOR        12
-#define SHIVA_CUDA_MINOR        6
-#define SHIVA_CUDA_PATCHLEVEL   
-#define SHIVA_CUDA_VERSION_INT  
+#define SHIVA_CUDA_MAJOR        11
+#define SHIVA_CUDA_MINOR        8
diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp
index c3280e1..cad432a 100644
--- a/src/common/ShivaMacros.hpp
+++ b/src/common/ShivaMacros.hpp
@@ -105,16 +105,48 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
 
 
 
+
+// Portable builtin detector
+#ifndef SHIVA_HAS_BUILTIN
+  #ifdef __has_builtin
+    #define SHIVA_HAS_BUILTIN(x) __has_builtin(x)
+  #else
+    #define SHIVA_HAS_BUILTIN(x) 0
+  #endif
+#endif
+
+// Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain
+#if defined(__CUDA_ARCH__)
+  // Device code (nvcc, hipcc): no support in C++17
+  #define SHIVA_IS_CONST_EVAL() (false)
+
+#elif SHIVA_HAS_BUILTIN(__builtin_is_constant_evaluated)
+  // GCC / Clang host code
+  #define SHIVA_IS_CONST_EVAL() (__builtin_is_constant_evaluated())
+
+#elif defined(_MSC_VER)
+  // MSVC
+  #define SHIVA_IS_CONST_EVAL() (__is_constant_evaluated())
+
+#else
+  // Fallback: always runtime
+  #define SHIVA_IS_CONST_EVAL() (false)
+#endif
+
+
+
 /**
  * @brief This macro is used to implement an assertion.
  * @param cond The condition to assert is true.
  * @param ... The message to print if the assertion fails.
  */
 #define SHIVA_ASSERT_MSG( cond, ... ) \
-        do { \
-          if ( !(cond)) { \
-            if ( !__builtin_is_constant_evaluated()) { \
-              shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
-            } \
-          } \
-        } while ( 0 )
+do { \
+  if ( !(cond)) { \
+    if ( !SHIVA_IS_CONST_EVAL() ) { \
+      shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
+    } \
+  } \
+} while ( 0 )
+
+
diff --git a/src/geometry/mapping/unitTests/testLinearTransform.cpp b/src/geometry/mapping/unitTests/testLinearTransform.cpp
index 9087bb6..5a575bf 100644
--- a/src/geometry/mapping/unitTests/testLinearTransform.cpp
+++ b/src/geometry/mapping/unitTests/testLinearTransform.cpp
@@ -305,7 +305,13 @@ void testInvJacobianFunctionReturnByValueHelper()
     auto cell = makeLinearTransform( Xref );
     for ( int q = 0; q < 8; ++q )
     {
+#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12
+      auto tmp  = inverseJacobian( cell, qCoords[q] );
+      auto detJ = shiva::get< 0 >( tmp );
+      auto invJ = shiva::get< 1 >( tmp );
+#else
       auto [ detJ, invJ ] = inverseJacobian( cell, qCoords[q] );
+#endif
 
       kernelData[ 10 * q ] = detJ;
       for ( int i = 0; i < 3; ++i )
diff --git a/src/geometry/mapping/unitTests/testScaling.cpp b/src/geometry/mapping/unitTests/testScaling.cpp
index 91f8129..b83bbb2 100644
--- a/src/geometry/mapping/unitTests/testScaling.cpp
+++ b/src/geometry/mapping/unitTests/testScaling.cpp
@@ -143,7 +143,13 @@ void testInvJacobianFunctionReturnByValueHelper()
   {
     auto cell = makeScaling( h );
 
-    auto [ detJ, invJ ] = inverseJacobian( cell );
+#if defined(SHIVA_USE_CUDA) && SHIVA_CUDA_MAJOR < 12
+    auto tmp  = inverseJacobian( cell );
+    auto detJ = shiva::get< 0 >( tmp );
+    auto invJ = shiva::get< 1 >( tmp );
+#else
+    auto [detJ, invJ] = inverseJacobian( cell );
+#endif
     kdata[0] = detJ;
     kdata[1] = invJ( 0 );
     kdata[2] = invJ( 1 );

From 28880b2b44824a25110e0987e7da62f88ba02904 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <rrsettgast@gmail.com>
Date: Fri, 12 Sep 2025 19:59:17 +0900
Subject: [PATCH 25/26] doxygen

---
 docs/doxygen/ShivaConfig.hpp |  8 ++++----
 src/common/ShivaMacros.hpp   | 39 ++++++++++++++++++------------------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
index b186086..8e64b01 100644
--- a/docs/doxygen/ShivaConfig.hpp
+++ b/docs/doxygen/ShivaConfig.hpp
@@ -8,15 +8,15 @@
 
 #define SHIVA_VERSION_PATCHLEVEL 0
 
-#define SHIVA_USE_CUDA
+/* #undef SHIVA_USE_CUDA */
 
 /* #undef SHIVA_USE_HIP */
 
 /* #undef SHIVA_USE_CALIPER */
 
-/* #undef SHIVA_USE_CAMP */
+#define SHIVA_USE_CAMP
 
 #define SHIVA_USE_BOUNDS_CHECK
 
-#define SHIVA_CUDA_MAJOR        11
-#define SHIVA_CUDA_MINOR        8
+#define SHIVA_CUDA_MAJOR        0
+#define SHIVA_CUDA_MINOR        0
diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp
index cad432a..f8ddd3d 100644
--- a/src/common/ShivaMacros.hpp
+++ b/src/common/ShivaMacros.hpp
@@ -105,31 +105,34 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
 
 
 
-
-// Portable builtin detector
+/**
+ * @brief This macro is used to detect the presence of builtin functions.
+ */
 #ifndef SHIVA_HAS_BUILTIN
   #ifdef __has_builtin
-    #define SHIVA_HAS_BUILTIN(x) __has_builtin(x)
+    #define SHIVA_HAS_BUILTIN( x ) __has_builtin( x )
   #else
-    #define SHIVA_HAS_BUILTIN(x) 0
+    #define SHIVA_HAS_BUILTIN( x ) 0
   #endif
 #endif
 
-// Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain
+/**
+ * @brief Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain
+ */
 #if defined(__CUDA_ARCH__)
-  // Device code (nvcc, hipcc): no support in C++17
+// Device code (nvcc, hipcc): no support in C++17
   #define SHIVA_IS_CONST_EVAL() (false)
 
-#elif SHIVA_HAS_BUILTIN(__builtin_is_constant_evaluated)
-  // GCC / Clang host code
+#elif SHIVA_HAS_BUILTIN( __builtin_is_constant_evaluated )
+// GCC / Clang host code
   #define SHIVA_IS_CONST_EVAL() (__builtin_is_constant_evaluated())
 
 #elif defined(_MSC_VER)
-  // MSVC
+// MSVC
   #define SHIVA_IS_CONST_EVAL() (__is_constant_evaluated())
 
 #else
-  // Fallback: always runtime
+// Fallback: always runtime
   #define SHIVA_IS_CONST_EVAL() (false)
 #endif
 
@@ -141,12 +144,10 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
  * @param ... The message to print if the assertion fails.
  */
 #define SHIVA_ASSERT_MSG( cond, ... ) \
-do { \
-  if ( !(cond)) { \
-    if ( !SHIVA_IS_CONST_EVAL() ) { \
-      shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
-    } \
-  } \
-} while ( 0 )
-
-
+        do { \
+          if ( !(cond)) { \
+            if ( !SHIVA_IS_CONST_EVAL() ) { \
+              shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
+            } \
+          } \
+        } while ( 0 )

From 63d667de1caecc2949409e1cb2412848b2c79916 Mon Sep 17 00:00:00 2001
From: Randolph Settgast <settgast1@llnl.gov>
Date: Fri, 12 Sep 2025 20:10:21 +0900
Subject: [PATCH 26/26] Apply suggestions from code review

---
 .gitmodules | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index a546177..000410f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "cmake/blt"]
 	path = cmake/blt
-	url = https://github.com/LLNL/blt.git
+	url = ../../LLNL/blt.git
 [submodule "tpl/camp"]
 	path = tpl/camp
-	url = https://github.com/LLNL/camp.git
+	url = ../../LLNL/camp.git