Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
86f6622
comment out assert calls which are not constexpr
rrsettgast Mar 16, 2025
5d873d1
make camp more optional
rrsettgast Mar 22, 2025
abed558
some cmake fixes
rrsettgast Mar 29, 2025
9c3f0d0
fixes for absolute paths in aggregateOrSplit.py
rrsettgast Mar 29, 2025
6ff1f74
updated pmpl for data transfer
rrsettgast Apr 7, 2025
e6991d1
Merge branch 'main' into feature/integrationIntoProxySEM
rrsettgast Apr 9, 2025
34c55b6
Merge branch 'feature/integrationIntoProxySEM' of github.com:GEOS-DEV…
rrsettgast Apr 9, 2025
6d655d5
Merge branch 'main' into feature/integrationIntoProxySEM
rrsettgast Apr 10, 2025
0582eed
use html url for submodules to avoid issues TotalEnergies staff were …
rrsettgast May 10, 2025
64a16a7
change looping strategy for jacobian to static
rrsettgast May 20, 2025
8abcdf3
templatize hard coded real types
rrsettgast May 21, 2025
ece7de7
some code review suggestions
rrsettgast Sep 10, 2025
1c19a83
fix bug
rrsettgast Sep 10, 2025
f9232c3
try to fix bug again
rrsettgast Sep 10, 2025
a70ffbe
try to fix bug again
rrsettgast Sep 10, 2025
8b0f231
try to fix bug again
rrsettgast Sep 10, 2025
b9cb71c
try to fix bug again
rrsettgast Sep 10, 2025
7de0c15
cuda/std/tuple workaround and uncrustify
rrsettgast Sep 11, 2025
3b4b89b
try again
rrsettgast Sep 11, 2025
8598f07
try again
rrsettgast Sep 11, 2025
2d82182
remove a bunch of crap...just don't use the bindings for cuda wihtout…
rrsettgast Sep 11, 2025
3cba8eb
add cuda version through cmake
rrsettgast Sep 11, 2025
a34ddcf
try again buddy
rrsettgast Sep 11, 2025
93cd3aa
stop listening to chat
rrsettgast Sep 11, 2025
723a02c
fix some issues
rrsettgast Sep 11, 2025
cbca43c
add maple hostconfig
rrsettgast Sep 11, 2025
85e40e0
some fixes
rrsettgast Sep 12, 2025
28880b2
doxygen
rrsettgast Sep 12, 2025
63d667d
Apply suggestions from code review
rrsettgast Sep 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ set( SHIVA_VERSION_PATCHLEVEL 0 )
# check if Shiva is build as a submodule or a separate project
get_directory_property( parent_dir PARENT_DIRECTORY )
if(parent_dir)
set( is_submodule ON )
set( SHIVA_IS_SUBMODULE ON )
else()
set( is_submodule OFF )
set( SHIVA_IS_SUBMODULE OFF )
endif()

if( NOT is_submodule )
if( NOT SHIVA_IS_SUBMODULE )
message( "not a submodule")
project( Shiva LANGUAGES CXX C )

Expand Down Expand Up @@ -66,12 +66,22 @@ include( cmake/Macros.cmake )
include( cmake/Config.cmake )


set(SHIVA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
set(SHIVA_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )

message( STATUS "SHIVA_BINARY_DIR: ${SHIVA_BINARY_DIR}" )
message( STATUS "SHIVA_SOURCE_DIR: ${SHIVA_SOURCE_DIR}" )


add_subdirectory( src )
add_subdirectory( tpl/camp )
target_compile_options( camp PRIVATE "-Wno-shadow")

configure_file(tpl/camp/include/camp/config.in.hpp
${PROJECT_BINARY_DIR}/include/camp/config.hpp)
if( SHIVA_ENABLE_CAMP )
add_subdirectory( tpl/camp )
target_compile_options( camp PRIVATE "-Wno-shadow")

configure_file(tpl/camp/include/camp/config.in.hpp
${PROJECT_BINARY_DIR}/include/camp/config.hpp)
endif()


if( SHIVA_ENABLE_DOCS )
Expand Down
19 changes: 17 additions & 2 deletions cmake/CMakeBasics.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,20 @@ blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS_DEBUG
CLANG "-fstandalone-debug"
)


set( CAMP_ENABLE_TESTS OFF CACHE BOOL "")
option( SHIVA_ENABLE_CAMP OFF )
option( CAMP_ENABLE_TESTS OFF )


if( ENABLE_CUDA )
if( CUDA_VERSION AND CUDA_VERSION_MAJOR AND CUDA_VERSION_MINOR )
set( SHIVA_CUDA_VERSION ${CUDA_VERSION} )
set( SHIVA_CUDA_MAJOR ${CUDA_VERSION_MAJOR} )
set( SHIVA_CUDA_MINOR ${CUDA_VERSION_MINOR} )
else()
message(FATAL_ERROR "CUDA_VERSION_MAJOR and CUDA_VERSION_MINOR not defined")
endif()
else()
set( SHIVA_CUDA_VERSION 0 )
set( SHIVA_CUDA_MAJOR 0 )
set( SHIVA_CUDA_MINOR 0 )
endif()
1 change: 1 addition & 0 deletions cmake/Config.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#
set( PREPROCESSOR_DEFINES CUDA
HIP
CAMP
BOUNDS_CHECK
)

Expand Down
2 changes: 1 addition & 1 deletion cmake/blt
Submodule blt updated 354 files
5 changes: 5 additions & 0 deletions docs/doxygen/ShivaConfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,9 @@

/* #undef SHIVA_USE_CALIPER */

#define SHIVA_USE_CAMP

#define SHIVA_USE_BOUNDS_CHECK

#define SHIVA_CUDA_MAJOR 0
#define SHIVA_CUDA_MINOR 0
24 changes: 24 additions & 0 deletions hostconfigs/TTE/maple_rocky9.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
set(CONFIG_NAME "maple_rocky9" CACHE PATH "")

set(COMPILER_DIR /opt/rh/gcc-toolset-13/root/ )
set(CMAKE_C_COMPILER ${COMPILER_DIR}/bin/gcc CACHE PATH "")
set(CMAKE_CXX_COMPILER ${COMPILER_DIR}/bin/g++ CACHE PATH "")

# C++ options
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -mtune=native -march=native" CACHE STRING "")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "")

# Cuda options
set(ENABLE_CUDA ON CACHE BOOL "")
set(CUDA_TOOLKIT_ROOT_DIR /hrtc/apps/cuda/12.6.20/aarch64/rocky9 CACHE STRING "")
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "")
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "")
set(CMAKE_CUDA_ARCHITECTURES 90 CACHE STRING "")
set(CMAKE_CUDA_STANDARD 17 CACHE STRING "")
set(CMAKE_CUDA_FLAGS "-restrict --expt-extended-lambda --expt-relaxed-constexpr -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "")
#set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3 -Xcompiler -mcpu=powerpc64le -Xcompiler -mtune=powerpc64le" CACHE STRING "")
#set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "")
#set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "")

set( SHIVA_ENABLE_CAMP OFF CACHE BOOL "Disable CAMP support" FORCE )
17 changes: 13 additions & 4 deletions scripts/aggregateOrSplit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ def create_dependency_graph(self, header, include_paths=None):
if include_paths is None:
include_paths = []

header = os.path.abspath(header) # Normalize here

if header in self.dependencies:
return # Already processed

self.dependencies[header] = set()
base_path = os.path.dirname(os.path.abspath(header)) # Base directory of the current header
base_path = os.path.dirname(header) # Base directory of the current header

try:
with open(header, 'r') as file:
Expand All @@ -34,10 +36,10 @@ def create_dependency_graph(self, header, include_paths=None):
included_file = include_match.group(1)

if included_file != self.config_file:
resolved_path = self.resolve_path(
included_file, base_path, include_paths)
resolved_path = self.resolve_path( included_file, base_path, include_paths)

if resolved_path:
resolved_path = os.path.abspath(resolved_path)
self.dependencies[header].add(resolved_path)

if os.path.exists(resolved_path):
Expand Down Expand Up @@ -82,16 +84,21 @@ def resolve_path(self, included_file, base_path, include_paths):

return None # Return None if no resolution was possible


def generate_header_list(self):
remaining_dependencies = self.dependencies.copy()
size_of_remaining_dependencies = len(remaining_dependencies)
unique_files = set() # Track unique files by absolute path

while size_of_remaining_dependencies > 0:
local_included = []

for key in remaining_dependencies:
if len(remaining_dependencies[key]) == 0:
self.included_list.append(key)
abs_key = os.path.abspath(key)
if abs_key not in unique_files:
self.included_list.append(abs_key)
unique_files.add(abs_key)
local_included.append(key)

for included_key in local_included:
Expand All @@ -111,6 +118,7 @@ def process_header(header_path, output):
"""
Processes a single header file, commenting out includes and pragmas.
"""
header_path = os.path.abspath(header_path)
if header_path in self.included:
return # Avoid duplicate processing
self.included.add(header_path)
Expand All @@ -133,6 +141,7 @@ def process_header(header_path, output):

with open(output_file, 'w') as output:
for header in headers:
header = os.path.abspath(header)
self.create_dependency_graph(header, include_paths)

for header in self.dependencies:
Expand Down
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ blt_add_library( NAME shiva

target_include_directories( shiva
INTERFACE
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
$<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/include>
$<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/src>
$<INSTALL_INTERFACE:include> )

install( FILES ${shiva_headers}
Expand Down
7 changes: 6 additions & 1 deletion src/ShivaConfig.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,9 @@

#cmakedefine SHIVA_USE_CALIPER

#cmakedefine SHIVA_USE_BOUNDS_CHECK
#cmakedefine SHIVA_USE_CAMP

#cmakedefine SHIVA_USE_BOUNDS_CHECK

#define SHIVA_CUDA_MAJOR @SHIVA_CUDA_MAJOR@
#define SHIVA_CUDA_MINOR @SHIVA_CUDA_MINOR@
8 changes: 4 additions & 4 deletions src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ blt_add_library( NAME common

target_include_directories( common
INTERFACE
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/include>
$<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/src>
$<INSTALL_INTERFACE:include> )

target_include_directories( common
SYSTEM INTERFACE
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/tpl/camp/include>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/tpl/camp/include> )
$<BUILD_INTERFACE:${SHIVA_SOURCE_DIR}/tpl/camp/include>
$<BUILD_INTERFACE:${SHIVA_BINARY_DIR}/tpl/camp/include> )

install( FILES ${common_headers}
DESTINATION include/common )
Expand Down
35 changes: 34 additions & 1 deletion src/common/ShivaMacros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,39 @@ void i_g_n_o_r_e( ARGS const & ... ) {}



/**
* @brief This macro is used to detect the presence of builtin functions.
*/
#ifndef SHIVA_HAS_BUILTIN
#ifdef __has_builtin
#define SHIVA_HAS_BUILTIN( x ) __has_builtin( x )
#else
#define SHIVA_HAS_BUILTIN( x ) 0
#endif
#endif

/**
* @brief Define SHIVA_IS_CONST_EVAL() depending on compiler/toolchain
*/
#if defined(__CUDA_ARCH__)
// Device code (nvcc, hipcc): no support in C++17
#define SHIVA_IS_CONST_EVAL() (false)

#elif SHIVA_HAS_BUILTIN( __builtin_is_constant_evaluated )
// GCC / Clang host code
#define SHIVA_IS_CONST_EVAL() (__builtin_is_constant_evaluated())

#elif defined(_MSC_VER)
// MSVC
#define SHIVA_IS_CONST_EVAL() (__is_constant_evaluated())

#else
// Fallback: always runtime
#define SHIVA_IS_CONST_EVAL() (false)
#endif



/**
* @brief This macro is used to implement an assertion.
* @param cond The condition to assert is true.
Expand All @@ -113,7 +146,7 @@ void i_g_n_o_r_e( ARGS const & ... ) {}
#define SHIVA_ASSERT_MSG( cond, ... ) \
do { \
if ( !(cond)) { \
if ( !__builtin_is_constant_evaluated()) { \
if ( !SHIVA_IS_CONST_EVAL() ) { \
shivaAssertionFailed( __FILE__, __LINE__, true, __VA_ARGS__ ); \
} \
} \
Expand Down
49 changes: 26 additions & 23 deletions src/common/pmpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,27 @@
namespace shiva
{
#if defined(SHIVA_USE_DEVICE)
#if defined(SHIVA_USE_CUDA)
#define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES );
#define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES );
#define deviceDeviceSynchronize() cudaDeviceSynchronize();
#define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND );
#define deviceFree( PTR ) cudaFree( PTR );
#define deviceError_t cudaError_t
#define deviceSuccess cudaSuccess
#define deviceGetErrorString cudaGetErrorString
#elif defined(SHIVA_USE_HIP)
#define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
#define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
#define deviceDeviceSynchronize() hipDeviceSynchronize();
#define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND );
#define deviceFree( PTR ) hipFree( PTR );
#define deviceError_t hipError_t
#define deviceSuccess = hipSuccess;
#define deviceGetErrorString hipGetErrorString
#endif
#if defined(SHIVA_USE_CUDA)
#define deviceMalloc( PTR, BYTES ) cudaMalloc( PTR, BYTES );
#define deviceMallocManaged( PTR, BYTES ) cudaMallocManaged( PTR, BYTES );
#define deviceDeviceSynchronize() cudaDeviceSynchronize();
#define deviceMemCpy( DST, SRC, BYTES, KIND ) cudaMemcpy( DST, SRC, BYTES, KIND );
#define deviceFree( PTR ) cudaFree( PTR );
#define deviceError_t cudaError_t
#define deviceGetErrorString cudaGetErrorString
#define deviceMemcpyDeviceToHost cudaMemcpyDeviceToHost
constexpr cudaError_t deviceSuccess = cudaSuccess;
#elif defined(SHIVA_USE_HIP)
#define deviceMalloc( PTR, BYTES ) hipMalloc( PTR, BYTES );
#define deviceMallocManaged( PTR, BYTES ) hipMallocManaged( PTR, BYTES );
#define deviceDeviceSynchronize() hipDeviceSynchronize();
#define deviceMemCpy( DST, SRC, BYTES, KIND ) hipMemcpy( DST, SRC, BYTES, KIND );
#define deviceFree( PTR ) hipFree( PTR );
#define deviceError_t hipError_t
#define deviceGetErrorString hipGetErrorString
#define deviceMemcpyDeviceToHost hipMemcpyDeviceToHost
constexpr hipError_t deviceSuccess = hipSuccess;
#endif
#endif

/**
Expand Down Expand Up @@ -100,9 +102,9 @@ void genericKernelWrapper( LAMBDA && func, bool const abortOnError = true )
#if defined(SHIVA_USE_DEVICE)
// UNCRUSTIFY-OFF
genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ) );
//UNCRUSTIFY-ON
// UNCRUSTIFY-ON
deviceError_t err = deviceDeviceSynchronize();
if ( err != cudaSuccess )
if ( err != deviceSuccess )
{
printf( "Kernel failed: %s\n", deviceGetErrorString( err ));
if ( abortOnError )
Expand Down Expand Up @@ -157,13 +159,14 @@ void genericKernelWrapper( int const N, DATA_TYPE * const hostData, LAMBDA && fu
#if defined(SHIVA_USE_DEVICE)
DATA_TYPE * deviceData;
deviceMalloc( &deviceData, N * sizeof(DATA_TYPE) );
deviceMemCpy( deviceData, hostData, N * sizeof(DATA_TYPE), cudaMemcpyHostToDevice );
// UNCRUSTIFY-OFF
genericKernel <<< 1, 1 >>> ( std::forward< LAMBDA >( func ), deviceData );
// UNCRUSTIFY-ON
deviceError_t err = deviceDeviceSynchronize();
deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), cudaMemcpyDeviceToHost );
deviceMemCpy( hostData, deviceData, N * sizeof(DATA_TYPE), deviceMemcpyDeviceToHost );
deviceFree( deviceData );
if ( err != cudaSuccess )
if ( err != deviceSuccess )
{
printf( "Kernel failed: %s\n", deviceGetErrorString( err ));
if ( abortOnError )
Expand Down
11 changes: 8 additions & 3 deletions src/common/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "common/ShivaMacros.hpp"

/// @brief Macro to define whether or not to use camp.
#define SHIVA_USE_CAMP
#if defined(SHIVA_USE_CAMP)
#include <camp/camp.hpp>
#else
Expand Down Expand Up @@ -52,7 +51,9 @@ using tuple = camp::tuple< T ... >;
* @return A tuple with the elements passed as arguments.
*/
template< typename ... T >
SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t )
SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
auto
make_tuple( T && ... t )
{
return camp::make_tuple( std::forward< T >( t ) ... );
}
Expand All @@ -65,6 +66,7 @@ SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE auto make_tuple( T && ... t )
*/
template< typename ... T >
using tuple = cuda::std::tuple< T ... >;
using cuda::std::get;

/**
* @brief Wrapper for cuda::std::make_tuple.
Expand All @@ -73,6 +75,7 @@ using tuple = cuda::std::tuple< T ... >;
* @return A tuple with the elements passed as arguments.
*/
template< typename ... T >
SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
auto make_tuple( T && ... t )
{
return cuda::std::make_tuple( std::forward< T >( t ) ... );
Expand All @@ -92,7 +95,9 @@ using tuple = std::tuple< T ... >;
* @return A tuple with the elements passed as arguments.
*/
template< typename ... T >
auto make_tuple( T && ... t )
SHIVA_CONSTEXPR_HOSTDEVICE_FORCEINLINE
auto
make_tuple( T && ... t )
{
return std::make_tuple( std::forward< T >( t ) ... );
}
Expand Down
Loading