GEOS-DEV · rrsettgast · Sep 12, 2025 · Mar 16, 2025 · Mar 22, 2025 · Mar 29, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -15,12 +15,12 @@ set( SHIVA_VERSION_PATCHLEVEL 0 )
 # check if Shiva is build as a submodule or a separate project
 get_directory_property( parent_dir PARENT_DIRECTORY )
 if(parent_dir)
-    set( is_submodule ON )
+    set( SHIVA_IS_SUBMODULE ON )
 else()
-    set( is_submodule OFF )
+    set( SHIVA_IS_SUBMODULE OFF )
 endif()
 
-if( NOT is_submodule )
+if( NOT SHIVA_IS_SUBMODULE )
     message( "not a submodule")
     project( Shiva LANGUAGES CXX C )
 
@@ -66,12 +66,22 @@ include( cmake/Macros.cmake )
 include( cmake/Config.cmake )
 
 
+set(SHIVA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
+set(SHIVA_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )
+
+message( STATUS "SHIVA_BINARY_DIR: ${SHIVA_BINARY_DIR}" )
+message( STATUS "SHIVA_SOURCE_DIR: ${SHIVA_SOURCE_DIR}" )
+
+
 add_subdirectory( src )
-add_subdirectory( tpl/camp )
-target_compile_options( camp PRIVATE "-Wno-shadow") 
 
-configure_file(tpl/camp/include/camp/config.in.hpp
-  ${PROJECT_BINARY_DIR}/include/camp/config.hpp)
+if( SHIVA_ENABLE_CAMP )
+    add_subdirectory( tpl/camp )
+    target_compile_options( camp PRIVATE "-Wno-shadow") 
+
+    configure_file(tpl/camp/include/camp/config.in.hpp
+      ${PROJECT_BINARY_DIR}/include/camp/config.hpp)
+endif()
 
 
 if( SHIVA_ENABLE_DOCS )

diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake
@@ -27,5 +27,20 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG
                                  CLANG "-fstandalone-debug"
                                 )
 
-
-set( CAMP_ENABLE_TESTS OFF CACHE BOOL "")
+option( SHIVA_ENABLE_CAMP OFF )
+option( CAMP_ENABLE_TESTS OFF )
+
+
+if( ENABLE_CUDA )
+  if( CUDA_VERSION AND CUDA_VERSION_MAJOR AND CUDA_VERSION_MINOR )
+    set( SHIVA_CUDA_VERSION ${CUDA_VERSION} )
+    set( SHIVA_CUDA_MAJOR ${CUDA_VERSION_MAJOR} )
+    set( SHIVA_CUDA_MINOR ${CUDA_VERSION_MINOR} )
+  else()
+    message(FATAL_ERROR "CUDA_VERSION_MAJOR and CUDA_VERSION_MINOR not defined")
+  endif()
+else()
+  set( SHIVA_CUDA_VERSION 0 )
+  set( SHIVA_CUDA_MAJOR 0 )
+  set( SHIVA_CUDA_MINOR 0 )
+endif()
diff --git a/cmake/Config.cmake b/cmake/Config.cmake
@@ -1,6 +1,7 @@
 #
 set( PREPROCESSOR_DEFINES CUDA
                           HIP
+                          CAMP
                           BOUNDS_CHECK
                         )
 

diff --git a/cmake/blt b/cmake/blt
diff --git a/docs/doxygen/ShivaConfig.hpp b/docs/doxygen/ShivaConfig.hpp
@@ -14,4 +14,9 @@
 
 /* #undef SHIVA_USE_CALIPER */
 
+#define SHIVA_USE_CAMP
+
 #define SHIVA_USE_BOUNDS_CHECK
+
+#define SHIVA_CUDA_MAJOR        0
+#define SHIVA_CUDA_MINOR        0
diff --git a/hostconfigs/TTE/maple_rocky9.cmake b/hostconfigs/TTE/maple_rocky9.cmake
@@ -0,0 +1,24 @@
+set(CONFIG_NAME "maple_rocky9" CACHE PATH "") 
+
+set(COMPILER_DIR  /opt/rh/gcc-toolset-13/root/ )
+set(CMAKE_C_COMPILER ${COMPILER_DIR}/bin/gcc CACHE PATH "")
+set(CMAKE_CXX_COMPILER ${COMPILER_DIR}/bin/g++ CACHE PATH "")
+
+# C++ options
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -mtune=native -march=native" CACHE STRING "")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "")
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")
+
+# Cuda options
+set(ENABLE_CUDA ON CACHE BOOL "")
+set(CUDA_TOOLKIT_ROOT_DIR /hrtc/apps/cuda/12.6.20/aarch64/rocky9 CACHE STRING "")
+set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "")
+set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "")
+set(CMAKE_CUDA_ARCHITECTURES 90 CACHE STRING "")
+set(CMAKE_CUDA_STANDARD 17 CACHE STRING "")
+set(CMAKE_CUDA_FLAGS "-restrict --expt-extended-lambda --expt-relaxed-constexpr -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "")
+#set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3 -Xcompiler -mcpu=powerpc64le -Xcompiler -mtune=powerpc64le" CACHE STRING "")
+#set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "")
+#set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "")
+
+set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "Disable CAMP support" FORCE )
diff --git a/scripts/aggregateOrSplit.py b/scripts/aggregateOrSplit.py
@@ -20,11 +20,13 @@ def create_dependency_graph(self, header, include_paths=None):
         if include_paths is None:
             include_paths = []
 
+        header = os.path.abspath(header)  # Normalize here
+
         if header in self.dependencies:
             return  # Already processed
 
         self.dependencies[header] = set()
-        base_path = os.path.dirname(os.path.abspath(header))  # Base directory of the current header
+        base_path = os.path.dirname(header)  # Base directory of the current header
 
         try:
             with open(header, 'r') as file:
@@ -34,10 +36,10 @@ def create_dependency_graph(self, header, include_paths=None):
                         included_file = include_match.group(1)
 
                         if included_file != self.config_file:
-                            resolved_path = self.resolve_path(
-                                included_file, base_path, include_paths)
+                            resolved_path = self.resolve_path( included_file, base_path, include_paths)
 
                             if resolved_path:
+                                resolved_path = os.path.abspath(resolved_path)
                                 self.dependencies[header].add(resolved_path)
 
                                 if os.path.exists(resolved_path):
@@ -82,16 +84,21 @@ def resolve_path(self, included_file, base_path, include_paths):
 
         return None  # Return None if no resolution was possible
 
+
     def generate_header_list(self):
         remaining_dependencies = self.dependencies.copy()
         size_of_remaining_dependencies = len(remaining_dependencies)
+        unique_files = set()  # Track unique files by absolute path
 
         while size_of_remaining_dependencies > 0:
             local_included = []
 
             for key in remaining_dependencies:
                 if len(remaining_dependencies[key]) == 0:
-                    self.included_list.append(key)
+                    abs_key = os.path.abspath(key)
+                    if abs_key not in unique_files:
+                        self.included_list.append(abs_key)
+                        unique_files.add(abs_key)
                     local_included.append(key)
 
             for included_key in local_included:
@@ -111,6 +118,7 @@ def process_header(header_path, output):
             """
             Processes a single header file, commenting out includes and pragmas.
             """
+            header_path = os.path.abspath(header_path)
             if header_path in self.included:
                 return  # Avoid duplicate processing
             self.included.add(header_path)
@@ -133,6 +141,7 @@ def process_header(header_path, output):
 
         with open(output_file, 'w') as output:
             for header in headers:
+                header = os.path.abspath(header)
                 self.create_dependency_graph(header, include_paths)
 
             for header in self.dependencies:

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -23,7 +23,8 @@ blt_add_library( NAME             shiva
 
 target_include_directories( shiva
                             INTERFACE
-                            $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
+                            $<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/include>
+                            $<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/src>
                             $<INSTALL_INTERFACE:include> )
 
 install( FILES ${shiva_headers} 

diff --git a/src/ShivaConfig.hpp.in b/src/ShivaConfig.hpp.in
@@ -14,4 +14,9 @@
 
 #cmakedefine SHIVA_USE_CALIPER
 
-#cmakedefine SHIVA_USE_BOUNDS_CHECK
+#cmakedefine SHIVA_USE_CAMP
+
+#cmakedefine SHIVA_USE_BOUNDS_CHECK
+
+#define SHIVA_CUDA_MAJOR        @SHIVA_CUDA_MAJOR@
+#define SHIVA_CUDA_MINOR        @SHIVA_CUDA_MINOR@
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
@@ -35,14 +35,14 @@ blt_add_library( NAME             common
 
 target_include_directories( common 
                             INTERFACE 
-                            $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
-                            $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/src>
+                            $<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/include>
+                            $<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/src>
                             $<INSTALL_INTERFACE:include> )
 
 target_include_directories( common 
                             SYSTEM INTERFACE
-                            $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/tpl/camp/include>
-                            $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/tpl/camp/include> )
+                            $<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/tpl/camp/include>
+                            $<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/tpl/camp/include> )
 
 install( FILES ${common_headers} 
          DESTINATION include/common )

diff --git a/src/common/ShivaMacros.hpp b/src/common/ShivaMacros.hpp
@@ -105,6 +105,39 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
 
 
 
+/**
+ * @brief This macro is used to detect the presence of builtin functions.
+ */
+#ifndef SHIVA_HAS_BUILTIN
+  #ifdef __has_builtin
+    #define SHIVA_HAS_BUILTIN( x ) __has_builtin( x )
+  #else
+    #define SHIVA_HAS_BUILTIN( x ) 0
+  #endif
+#endif
+
+/**
+ * @brief Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain
+ */
+#if defined(__CUDA_ARCH__)
+// Device code (nvcc, hipcc): no support in C++17
+  #define SHIVA_IS_CONST_EVAL() (false)
+
+#elif SHIVA_HAS_BUILTIN( __builtin_is_constant_evaluated )
+// GCC / Clang host code
+  #define SHIVA_IS_CONST_EVAL() (__builtin_is_constant_evaluated())
+
+#elif defined(_MSC_VER)
+// MSVC
+  #define SHIVA_IS_CONST_EVAL() (__is_constant_evaluated())
+
+#else
+// Fallback: always runtime
+  #define SHIVA_IS_CONST_EVAL() (false)
+#endif
+
+
+
 /**
  * @brief This macro is used to implement an assertion.
  * @param cond The condition to assert is true.
@@ -113,7 +146,7 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
 #define SHIVA_ASSERT_MSG( cond, ... ) \
         do { \
           if ( !(cond)) { \
-            if ( !__builtin_is_constant_evaluated()) { \
+            if ( !SHIVA_IS_CONST_EVAL() ) { \
               shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
             } \
           } \

diff --git a/src/common/pmpl.hpp b/src/common/pmpl.hpp
@@ -28,25 +28,27 @@
 namespace shiva
 {
 #if defined(SHIVA_USE_DEVICE)
-  #if defined(SHIVA_USE_CUDA)
-    #define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES );
-    #define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES );
-    #define deviceDeviceSynchronize() cudaDeviceSynchronize();
-    #define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND );
-    #define deviceFree( PTR ) cudaFree( PTR );
-    #define deviceError_t cudaError_t
-    #define deviceSuccess cudaSuccess
-    #define deviceGetErrorString    cudaGetErrorString
-    #elif defined(SHIVA_USE_HIP)
-    #define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
-    #define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
-    #define deviceDeviceSynchronize() hipDeviceSynchronize();
-    #define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND );
-    #define deviceFree( PTR ) hipFree( PTR );
-    #define deviceError_t hipError_t
-    #define deviceSuccess = hipSuccess;
-    #define deviceGetErrorString    hipGetErrorString
-    #endif
+#if defined(SHIVA_USE_CUDA)
+#define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES );
+#define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES );
+#define deviceDeviceSynchronize() cudaDeviceSynchronize();
+#define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND );
+#define deviceFree( PTR ) cudaFree( PTR );
+#define deviceError_t cudaError_t
+#define deviceGetErrorString    cudaGetErrorString
+#define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
+constexpr cudaError_t deviceSuccess = cudaSuccess;
+#elif defined(SHIVA_USE_HIP)
+#define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
+#define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
+#define deviceDeviceSynchronize() hipDeviceSynchronize();
+#define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND );
+#define deviceFree( PTR ) hipFree( PTR );
+#define deviceError_t hipError_t
+#define deviceGetErrorString    hipGetErrorString
+#define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
+constexpr hipError_t deviceSuccess = hipSuccess;
+#endif
 #endif
 
 /**
@@ -100,9 +102,9 @@ void genericKernelWrapper( LAMBDA && func, bool const abortOnError = true )
 #if defined(SHIVA_USE_DEVICE)
   // UNCRUSTIFY-OFF
   genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ) );
-  //UNCRUSTIFY-ON
+  // UNCRUSTIFY-ON
   deviceError_t err = deviceDeviceSynchronize();
-  if ( err != cudaSuccess )
+  if ( err != deviceSuccess )
   {
     printf( "Kernel failed: %s\n", deviceGetErrorString( err ));
     if ( abortOnError )
@@ -157,13 +159,14 @@ void genericKernelWrapper( int const N, DATA_TYPE * const hostData, LAMBDA && fu
 #if defined(SHIVA_USE_DEVICE)
   DATA_TYPE * deviceData;
   deviceMalloc( &deviceData, N * sizeof(DATA_TYPE) );
+  deviceMemCpy( deviceData, hostData, N * sizeof(DATA_TYPE), cudaMemcpyHostToDevice );
   // UNCRUSTIFY-OFF
   genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ), deviceData );
   // UNCRUSTIFY-ON
   deviceError_t err = deviceDeviceSynchronize();
-  deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost );
+  deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), deviceMemcpyDeviceToHost );
   deviceFree( deviceData );
-  if ( err != cudaSuccess )
+  if ( err != deviceSuccess )
   {
     printf( "Kernel failed: %s\n", deviceGetErrorString( err ));
     if ( abortOnError )

diff --git a/src/common/types.hpp b/src/common/types.hpp
@@ -20,7 +20,6 @@
 #include "common/ShivaMacros.hpp"
 
 /// @brief Macro to define whether or not to use camp.
-#define SHIVA_USE_CAMP
 #if defined(SHIVA_USE_CAMP)
 #include <camp/camp.hpp>
 #else
@@ -52,7 +51,9 @@ using tuple = camp::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
-SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t )
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
+auto
+make_tuple( T && ... t )
 {
   return camp::make_tuple( std::forward< T >( t ) ... );
 }
@@ -65,6 +66,7 @@ SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t )
  */
 template< typename ... T >
 using tuple = cuda::std::tuple< T ... >;
+using cuda::std::get;
 
 /**
  * @brief Wrapper for cuda::std::make_tuple.
@@ -73,6 +75,7 @@ using tuple = cuda::std::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
 auto make_tuple( T && ... t )
 {
   return cuda::std::make_tuple( std::forward< T >( t ) ... );
@@ -92,7 +95,9 @@ using tuple = std::tuple< T ... >;
  * @return A tuple with the elements passed as arguments.
  */
 template< typename ... T >
-auto make_tuple( T && ... t )
+SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
+auto
+make_tuple( T && ... t )
 {
   return std::make_tuple( std::forward< T >( t ) ... );
 }