diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..743a8ef --- /dev/null +++ b/.clang-format @@ -0,0 +1,14 @@ +--- +BasedOnStyle: LLVM +UseTab: Never +IndentWidth: 3 +TabWidth: 3 +BreakBeforeBraces: Attach +PointerAlignment: Left +AlignAfterOpenBracket: true +AllowShortIfStatementsOnASingleLine: false +IndentCaseLabels: false +ColumnLimit: 120 +AccessModifierOffset: -3 +... + diff --git a/.github/workflows/cmake_single_platform.yml b/.github/workflows/cmake_single_platform.yml new file mode 100644 index 0000000..dde7f7e --- /dev/null +++ b/.github/workflows/cmake_single_platform.yml @@ -0,0 +1,58 @@ +name: CMake build and test fsgrid + +on: + push: + branches: [ "master", "development", "ctest_gtest" ] + pull_request: + branches: [ "master", "development", "ctest_gtest" ] + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + PROJECT_NAME: fsgrid + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install OpenMPI + run: > + sudo apt-get install + openmpi-bin + libopenmpi-dev + openmpi-common + + - name: Configure CMake + run: > + cmake + -B ${{github.workspace}}/build + -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + -Dproject_name=${{env.PROJECT_NAME}} + -DMPI_TEST_NUM_PROCS=16 + -DMPI_TEST_EXTRA_FLAGS=--oversubscribe + + - name: Build + run: > + cmake + --build ${{github.workspace}}/build + --config ${{env.BUILD_TYPE}} + + - name: Test + id: test + working-directory: ${{github.workspace}} + run: > + ctest + --test-dir ${{github.workspace}}/build/ + -C ${{env.BUILD_TYPE}} + --output-on-failure + --output-log ${{github.workspace}}/build/github_test_log.log + + - name: Archive test logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-logs + path: ${{github.workspace}}/build/github_test_log.log diff --git a/.gitignore b/.gitignore index b8bd026..fe36acb 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,10 @@ *.exe *.out *.app + +# directories to ignore +build/ +.cache/ + +# compile_commands +compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..dd8e24b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.18...3.24) + +project(${project_name} VERSION 1.0 DESCRIPTION "Fsgrid testing with ctest and googletest" LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Download googletest as a dependency +include(FetchContent) +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571 # release-1.14.0 +) +FetchContent_MakeAvailable(googletest) + +include(CTest) +enable_testing() + +set(gcc "$") + +set(debug_gcc "$,${gcc}>") + +set(gcc_warn -Wall;-Werror;-Wextra;-Wconversion;-Wsign-conversion;) +set(gcc_warn ${gcc_warn};-pedantic-errors;-Wcast-qual;-Wwrite-strings;) +set(gcc_warn ${gcc_warn};-Wcast-align=strict;-Wparentheses;) +set(gcc_warn ${gcc_warn};-Wlogical-op;-Wlogical-not-parentheses;) +set(gcc_warn ${gcc_warn};-Wredundant-decls;-Wformat=2;) +set(gcc_warn ${gcc_warn};-Wformat-security;-Wformat-nonliteral;) +set(gcc_warn ${gcc_warn};-Wnull-dereference;-Winit-self;-Wuninitialized;) +set(gcc_warn ${gcc_warn};-Warith-conversion;-Wduplicated-branches;) +set(gcc_warn ${gcc_warn};-Wpointer-arith;-Wundef;) +set(gcc_warn ${gcc_warn};-Wduplicated-cond;-Wformat-signedness;) + +set(gcc_deb_opt -Og;) +set(gcc_rel_opt -O3;) + +add_compile_options( + "$<${gcc}:${gcc_warn}>" + "$" + ) + +# This must be after the add_compile_options for it to have any effect +add_subdirectory(tests) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bbea6c1 --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +MAKEFLAGS += --silent + +project_name:=fsgrid +root_dir:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +source_dir:=$(root_dir) +build_dir_prefix:=$(root_dir)/build + +.PHONY: all test clean + +.ONESHELL: +all: + build_type=Release + build_dir=$(build_dir_prefix)/$${build_type} + cmake \ + -B $${build_dir} \ + -S $(source_dir) \ + -DCMAKE_BUILD_TYPE:STRING=$${build_type} \ + -Dproject_name=$(project_name) \ + -DMPI_TEST_NUM_PROCS=16 \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + cmake \ + --build $${build_dir} \ + -j8 + cp $${build_dir}/compile_commands.json $(source_dir) + +clean: + rm -rf $(build_dir_prefix) $(project_name) + +test: all + ctest --test-dir $(build_dir_prefix)/Release --output-on-failure diff --git a/fsgrid.hpp b/fsgrid.hpp index cb81473..b8a1b9e 100644 --- a/fsgrid.hpp +++ b/fsgrid.hpp @@ -2,10 +2,10 @@ /* Copyright (C) 2016 Finnish Meteorological Institute - Copyright (C) 2016-2024 CSC -IT Center for Science + Copyright (C) 2016-2024 CSC -IT Center for Science This file is part of fsgrid - + fsgrid is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -20,1141 +20,7 @@ You should have received a copy of the GNU General Public License along with fsgrid. If not, see . */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef FS_MASTER_RANK -#define FS_MASTER_RANK 0 -#endif - -struct FsGridTools{ - - typedef uint32_t FsSize_t; // Size type for global array indices - typedef int32_t FsIndex_t; // Size type for global/local array indices, incl. possible negative values - - typedef int64_t LocalID; - typedef int64_t GlobalID; - typedef int Task_t; - - //! Helper function: calculate position of the local coordinate space for the given dimension - // \param globalCells Number of cells in the global Simulation, in this dimension - // \param ntasks Total number of tasks in this dimension - // \param my_n This task's position in this dimension - // \return Cell number at which this task's domains cells start (actual cells, not counting ghost cells) - static FsIndex_t calcLocalStart(FsSize_t globalCells, Task_t ntasks, Task_t my_n) { - FsIndex_t n_per_task = globalCells / ntasks; - FsIndex_t remainder = globalCells % ntasks; - - if(my_n < remainder) { - return my_n * (n_per_task+1); - } else { - return my_n * n_per_task + remainder; - } - } - - //! Helper function: given a global cellID, calculate the global cell coordinate from it. - // This is then used do determine the task responsible for this cell, and the - // local cell index in it. - static std::array globalIDtoCellCoord(GlobalID id, std::array &globalSize) { - - // Transform globalID to global cell coordinate - std::array cell; - - assert(id >= 0); - assert(id < globalSize[0] * globalSize[1] * globalSize[2]); - - int stride=1; - for(int i=0; i<3; i++) { - cell[i] = (id / stride) % globalSize[i]; - stride *= globalSize[i]; - } - - return cell; - } - - //! Helper function: calculate size of the local coordinate space for the given dimension - // \param globalCells Number of cells in the global Simulation, in this dimension - // \param ntasks Total number of tasks in this dimension - // \param my_n This task's position in this dimension - // \return Number of cells for this task's local domain (actual cells, not counting ghost cells) - static FsIndex_t calcLocalSize(FsSize_t globalCells, Task_t ntasks, Task_t my_n) { - FsIndex_t n_per_task = globalCells/ntasks; - FsIndex_t remainder = globalCells%ntasks; - if(my_n < remainder) { - return n_per_task+1; - } else { - return n_per_task; - } - } - - //! Helper function to optimize decomposition of this grid over the given number of tasks - static void computeDomainDecomposition(const std::array& GlobalSize, Task_t nProcs, std::array& processDomainDecomposition, int stencilSize=1, int verbose = 0) { - int myRank, MPI_flag; - MPI_Initialized(&MPI_flag); - if(MPI_flag){ - MPI_Comm_rank(MPI_COMM_WORLD, &myRank); // TODO allow for separate communicator - } else { - myRank = FS_MASTER_RANK; - } - - std::array systemDim; - std::array processBox; - std::array minDomainSize; - int64_t optimValue = std::numeric_limits::max(); - std::vector>> scored_decompositions; - scored_decompositions.push_back(std::pair>(optimValue,{0,0,0})); - for(int i = 0; i < 3; i++) { - systemDim[i] = GlobalSize[i]; - if(GlobalSize[i] == 1) { - // In 2D simulation domains, the "thin" dimension can be a single cell thick. - minDomainSize[i] = 1; - } else { - // Otherwise, it needs to be at least as large as our ghost - // stencil, so that ghost communication remains consistent. - minDomainSize[i] = stencilSize; - } - } - processDomainDecomposition = {1, 1, 1}; - for (Task_t i = 1; i <= std::min(nProcs, (Task_t)(GlobalSize[0]/minDomainSize[0])); i++) { - for (Task_t j = 1; j <= std::min(nProcs, (Task_t)(GlobalSize[1]/minDomainSize[1])) ; j++) { - if( i * j > nProcs ){ - break; - } - Task_t k = nProcs/(i*j); - // No need to optimize an incompatible DD, also checks for missing remainders - if( i * j * k != nProcs ) { - continue; - } - if (k > (Task_t)(GlobalSize[2]/minDomainSize[2])) { - continue; - } - - processBox[0] = calcLocalSize(systemDim[0],i,0); - processBox[1] = calcLocalSize(systemDim[1],j,0); - processBox[2] = calcLocalSize(systemDim[2],k,0); - - int64_t value = - (i > 1 ? processBox[1] * processBox[2]: 0) + - (j > 1 ? processBox[0] * processBox[2]: 0) + - (k > 1 ? processBox[0] * processBox[1]: 0); - - // account for singular domains - if (i!=1 && j!= 1 && k!=1) { - value *= 13; // 26 neighbours to communicate to - } - if (i==1 && j!= 1 && k!=1) { - value *= 4; // 8 neighbours to communicate to - } - if (i!=1 && j== 1 && k!=1) { - value *= 4; // 8 neighbours to communicate to - } - if (i!=1 && j!= 1 && k==1) { - value *= 4; // 8 neighbours to communicate to - } - // else: 2 neighbours to communicate to, no need to adjust - - if(value <= optimValue ){ - optimValue = value; - if(value < scored_decompositions.back().first){ - scored_decompositions.clear(); - } - scored_decompositions.push_back(std::pair>(value, {i,j,k})); - } - } - } - - if(myRank == FS_MASTER_RANK && verbose){ - std::cout << "(FSGRID) Number of equal minimal-surface decompositions found: " << scored_decompositions.size() << "\n"; - for (auto kv : scored_decompositions){ - std::cout << "(FSGRID) Decomposition " << kv.second[0] <<","<::max() || - (Task_t)(processDomainDecomposition[0] * processDomainDecomposition[1] * processDomainDecomposition[2]) != nProcs) { - if(myRank==0){ - std::cerr << "(FSGRID) Domain decomposition failed, are you running on a prime number of tasks?" << std::endl; - } - throw std::runtime_error("FSGrid computeDomainDecomposition failed"); - } - if(myRank==0 && verbose){ - std::cout << "(FSGRID) decomposition chosen as "<< processDomainDecomposition[0] << " " << processDomainDecomposition[1] << " " << processDomainDecomposition[2] << ", for processBox sizes " << - systemDim[0]/processDomainDecomposition[0] << " " << systemDim[1]/processDomainDecomposition[1] << " " << systemDim[2]/processDomainDecomposition[2] << - " \n"; - } - } - - -}; - -/*! Simple cartesian, non-loadbalancing MPI Grid for use with the fieldsolver - * - * \param T datastructure containing the field in each cell which this grid manages - * \param stencil ghost cell width of this grid - */ -template class FsGrid : public FsGridTools{ - template void swapArray(std::array& array) { - ArrayT a = array[0]; - array[0] = array[2]; - array[2] = a; - } - public: - - /*! Constructor for this grid. - * \param globalSize Cell size of the global simulation domain. - * \param MPI_Comm The MPI communicator this grid should use. - * \param isPeriodic An array specifying, for each dimension, whether it is to be treated as periodic. - */ - FsGrid(std::array globalSize, MPI_Comm parent_comm, std::array isPeriodic, - const std::array& decomposition = {0,0,0}, bool verbose = false) - : globalSize(globalSize) { - int status; - int size; - - // Get parent_comm info - int parentRank; - MPI_Comm_rank(parent_comm, &parentRank); - int parentSize; - MPI_Comm_size(parent_comm, &parentSize); - - // If environment variable FSGRID_PROCS is set, - // use that for determining the number of FS-processes - size = parentSize; - if(getenv("FSGRID_PROCS") != NULL) { - const int fsgridProcs = atoi(getenv("FSGRID_PROCS")); - if(fsgridProcs > 0 && fsgridProcs < size) - size = fsgridProcs; - } - - std::array emptyarr = {0,0,0}; - if (decomposition == emptyarr){ - // If decomposition isn't pre-defined, heuristically choose a good domain decomposition for our field size - computeDomainDecomposition(globalSize, size, ntasksPerDim, stencil, verbose); - } else { - ntasksPerDim = decomposition; - if (ntasksPerDim[0]*ntasksPerDim[1]*ntasksPerDim[2] != size){ - std::cerr << "Given decomposition ("< isPeriodicInt, ntasksInt; - for(unsigned int i=0; i < isPeriodic.size(); i++) { - isPeriodicInt[i] = (int)isPeriodic[i]; - ntasksInt[i] = (int)ntasksPerDim[i]; - } - - // Create a temporary FS subcommunicator for the MPI_Cart_create - int colorFs = (parentRank < size) ? 1 : MPI_UNDEFINED; - MPI_Comm_split(parent_comm, colorFs, parentRank, &comm1d); - - if(colorFs != MPI_UNDEFINED){ - // Create cartesian communicator. Note, that reorder is false so - // ranks match the ones in parent_comm - status = MPI_Cart_create(comm1d, 3, ntasksPerDim.data(), isPeriodicInt.data(), 0, &comm3d); - - if(status != MPI_SUCCESS) { - std::cerr << "Creating cartesian communicatior failed when attempting to create FsGrid!" << std::endl; - throw std::runtime_error("FSGrid communicator setup failed"); - } - - status = MPI_Comm_rank(comm3d, &rank); - if(status != MPI_SUCCESS) { - std::cerr << "Getting rank failed when attempting to create FsGrid!" << std::endl; - - // Without a rank, there's really not much we can do. Just return an uninitialized grid - // (I suppose we'll crash after this, anyway) - return; - } - - // Determine our position in the resulting task-grid - status = MPI_Cart_coords(comm3d, rank, 3, taskPosition.data()); - if(status != MPI_SUCCESS) { - std::cerr << "Rank " << rank - << " unable to determine own position in cartesian communicator when attempting to create FsGrid!" - << std::endl; - } - } - - // Create a temporary aux subcommunicator for the (Aux) MPI_Cart_create - int colorAux = (parentRank > (parentSize - 1) % size) ? (parentRank - (parentSize % size)) / size : MPI_UNDEFINED; - MPI_Comm_split(parent_comm, colorAux, parentRank, &comm1d_aux); - - int rankAux; - std::array taskPositionAux; - - if(colorAux != MPI_UNDEFINED){ - // Create an aux cartesian communicator corresponding to the comm3d (but shidted). - status = MPI_Cart_create(comm1d_aux, 3, ntasksPerDim.data(), isPeriodicInt.data(), 0, &comm3d_aux); - if(status != MPI_SUCCESS) { - std::cerr << "Creating cartesian communicatior failed when attempting to create FsGrid!" << std::endl; - throw std::runtime_error("FSGrid communicator setup failed"); - } - status = MPI_Comm_rank(comm3d_aux, &rankAux); - if(status != MPI_SUCCESS) { - std::cerr << "Getting rank failed when attempting to create FsGrid!" << std::endl; - return; - } - // Determine our position in the resulting task-grid - status = MPI_Cart_coords(comm3d_aux, rankAux, 3, taskPositionAux.data()); - if(status != MPI_SUCCESS) { - std::cerr << "Rank " << rankAux - << " unable to determine own position in cartesian communicator when attempting to create FsGrid!" - << std::endl; - } - } - -#ifdef FSGRID_DEBUG - // All FS ranks send their true comm3d rank and taskPosition data to dest - MPI_Request *request = new MPI_Request[(parentSize - 1) / size * 2 + 2]; - for(int i = 0; i < (parentSize - 1) / size; i++){ - int dest = (colorFs != MPI_UNDEFINED) ? parentRank + i * size + (parentSize - 1) - % size + 1 : MPI_PROC_NULL; - if(dest >= parentSize) - dest = MPI_PROC_NULL; - MPI_Isend(&rank, 1, MPI_INT, dest, 9274, parent_comm, &request[2 * i]); - MPI_Isend(taskPosition.data(), 3, MPI_INT, dest, 9275, parent_comm, &request[2 * i + 1]); - } - - // All Aux ranks receive the true comm3d rank and taskPosition data from - // source and then compare that it matches their aux data - std::array taskPositionRecv; - int rankRecv; - int source = (colorAux != MPI_UNDEFINED) ? parentRank - (parentRank - - (parentSize % size)) / size * size - parentSize % size : MPI_PROC_NULL; - - MPI_Irecv(&rankRecv, 1, MPI_INT, source, 9274, parent_comm, &request[(parentSize - 1) / size * 2]); - MPI_Irecv(taskPositionRecv.data(), 3, MPI_INT, source, 9275, parent_comm, - &request[(parentSize - 1) / size * 2 + 1]); - MPI_Waitall((parentSize - 1) / size * 2 + 2, request, MPI_STATUS_IGNORE); - - if(colorAux != MPI_UNDEFINED){ - if(rankRecv != rankAux || - taskPositionRecv[0] != taskPositionAux[0] || - taskPositionRecv[1] != taskPositionAux[1] || - taskPositionRecv[2] != taskPositionAux[2]){ - std::cerr << "Rank: " << parentRank - << ". Aux cartesian communicator 'comm3d_aux' does not match with 'comm3d' !" << std::endl; - throw std::runtime_error("FSGrid aux communicator setup failed."); - } - } - delete[] request; -#endif // FSGRID_DEBUG - - // Set correct task position for non-FS ranks - if(colorFs == MPI_UNDEFINED){ - for(int i=0; i<3; i++){ - taskPosition[i] = taskPositionAux[i]; - } - } - - // Determine size of our local grid - for(int i=0; i<3; i++) { - localSize[i] = calcLocalSize(globalSize[i],ntasksPerDim[i], taskPosition[i]); - localStart[i] = calcLocalStart(globalSize[i],ntasksPerDim[i], taskPosition[i]); - } - - if( localSize[0] == 0 || (globalSize[0] > stencil && localSize[0] < stencil) - || localSize[1] == 0 || (globalSize[1] > stencil && localSize[1] < stencil) - || localSize[2] == 0 || (globalSize[2] > stencil && localSize[2] < stencil)) { - std::cerr << "FSGrid space partitioning leads to a space that is too small on Rank " << rank << "." < neighPosition; - - /* - * Figure out the coordinates of the neighbours in all three - * directions - */ - neighPosition[0]=taskPosition[0]+x; - if(isPeriodic[0]) { - neighPosition[0] += ntasksPerDim[0]; - neighPosition[0] %= ntasksPerDim[0]; - } - - neighPosition[1]=taskPosition[1]+y; - if(isPeriodic[1]) { - neighPosition[1] += ntasksPerDim[1]; - neighPosition[1] %= ntasksPerDim[1]; - } - - neighPosition[2]=taskPosition[2]+z; - if(isPeriodic[2]) { - neighPosition[2] += ntasksPerDim[2]; - neighPosition[2] %= ntasksPerDim[2]; - } - - /* - * If those coordinates exist, figure out the responsible CPU - * and store its rank - */ - if(neighPosition[0]>=0 && neighPosition[0]=0 - && neighPosition[1]=0 && neighPosition[2]= 0 && neighRank < size) { - neighbour_index[neighRank]=(char) ((x+1)*9+(y+1)*3+(z+1)); - } - } else { - neighbour[(x+1)*9+(y+1)*3+(z+1)]=MPI_PROC_NULL; - } - } - } - } - - // Allocate local storage array - size_t totalStorageSize=1; - for(int i=0; i<3; i++) { - if(globalSize[i] <= 1) { - // Collapsed dimension => only one cell thick - storageSize[i] = 1; - } else { - // Size of the local domain + 2* size for the ghost cell stencil - storageSize[i] = (localSize[i] + stencil*2); - } - totalStorageSize *= storageSize[i]; - } - data.resize(totalStorageSize); - - MPI_Datatype mpiTypeT; - MPI_Type_contiguous(sizeof(T), MPI_BYTE, &mpiTypeT); - - // Compute send and receive datatypes - //loop through the shifts in the different directions - for(int x=-1; x<=1;x++) { - for(int y=-1; y<=1;y++) { - for(int z=-1; z<=1; z++) { - std::array subarraySize; - std::array subarrayStart; - const int shiftId = (x+1) * 9 + (y + 1) * 3 + (z + 1); - - if((storageSize[0] == 1 && x!= 0 ) || - (storageSize[1] == 1 && y!= 0 ) || - (storageSize[2] == 1 && z!= 0 ) || - (x == 0 && y == 0 && z == 0)){ - //skip flat dimension for 2 or 1D simulations, and self - neighbourSendType[shiftId] = MPI_DATATYPE_NULL; - neighbourReceiveType[shiftId] = MPI_DATATYPE_NULL; - continue; - } - - subarraySize[0] = (x == 0) ? localSize[0] : stencil; - subarraySize[1] = (y == 0) ? localSize[1] : stencil; - subarraySize[2] = (z == 0) ? localSize[2] : stencil; - - if( x == 0 || x == -1 ) - subarrayStart[0] = stencil; - else if (x == 1) - subarrayStart[0] = storageSize[0] - 2 * stencil; - if( y == 0 || y == -1 ) - subarrayStart[1] = stencil; - else if (y == 1) - subarrayStart[1] = storageSize[1] - 2 * stencil; - if( z == 0 || z == -1 ) - subarrayStart[2] = stencil; - else if (z == 1) - subarrayStart[2] = storageSize[2] - 2 * stencil; - - for(int i = 0;i < 3; i++) - if(storageSize[i] == 1) - subarrayStart[i] = 0; - - std::array swappedStorageSize = {(int)storageSize[0],(int)storageSize[1],(int)storageSize[2]}; - swapArray(swappedStorageSize); - swapArray(subarraySize); - swapArray(subarrayStart); - MPI_Type_create_subarray(3, - swappedStorageSize.data(), - subarraySize.data(), - subarrayStart.data(), - MPI_ORDER_C, - mpiTypeT, - &(neighbourSendType[shiftId]) ); - - if(x == 1 ) - subarrayStart[0] = 0; - else if (x == 0) - subarrayStart[0] = stencil; - else if (x == -1) - subarrayStart[0] = storageSize[0] - stencil; - if(y == 1 ) - subarrayStart[1] = 0; - else if (y == 0) - subarrayStart[1] = stencil; - else if (y == -1) - subarrayStart[1] = storageSize[1] - stencil; - if(z == 1 ) - subarrayStart[2] = 0; - else if (z == 0) - subarrayStart[2] = stencil; - else if (z == -1) - subarrayStart[2] = storageSize[2] - stencil; - for(int i = 0;i < 3; i++) - if(storageSize[i] == 1) - subarrayStart[i] = 0; - - swapArray(subarrayStart); - MPI_Type_create_subarray(3, - swappedStorageSize.data(), - subarraySize.data(), - subarrayStart.data(), - MPI_ORDER_C, - mpiTypeT, - &(neighbourReceiveType[shiftId])); - - } - } - } - - for(int i=0;i<27;i++){ - if(neighbourReceiveType[i] != MPI_DATATYPE_NULL) - MPI_Type_commit(&(neighbourReceiveType[i])); - if(neighbourSendType[i] != MPI_DATATYPE_NULL) - MPI_Type_commit(&(neighbourSendType[i])); - } - } - - std::vector& getData(){ - return this->data; - } - - void copyData(FsGrid &other){ - this->data = other.getData(); // Copy assignment - } - - /*! - * MPI calls fail after the main program called MPI_Finalize(), - * so this can be used instead of the destructor - * Cleans up the cartesian communicator and datatypes - */ - void finalize() noexcept { - if (comm3d != MPI_COMM_NULL) { - MPI_Comm_free(&comm3d); - comm3d = MPI_COMM_NULL; - } - if(comm3d_aux != MPI_COMM_NULL) { - MPI_Comm_free(&comm3d_aux); - comm3d_aux = MPI_COMM_NULL; - } - if(comm1d != MPI_COMM_NULL) { - MPI_Comm_free(&comm1d); - comm1d = MPI_COMM_NULL; - } - if(comm1d_aux != MPI_COMM_NULL) { - MPI_Comm_free(&comm1d_aux); - comm1d_aux = MPI_COMM_NULL; - } - - for (auto& type : neighbourReceiveType) { - if (type != MPI_DATATYPE_NULL) { - MPI_Type_free(&type); - type = MPI_DATATYPE_NULL; - } - } - for (auto& type : neighbourSendType) { - if (type != MPI_DATATYPE_NULL) { - MPI_Type_free(&type); - type = MPI_DATATYPE_NULL; - } - } - } - - /*! - * If finalize() isn't called manually, object should be destroyed before MPI finalization - */ - ~FsGrid() { - finalize(); - } - - friend void swap (FsGrid& first, FsGrid& second) noexcept { - using std::swap; - swap(first.DX, second.DX); - swap(first.DY, second.DY); - swap(first.DZ, second.DZ); - swap(first.physicalGlobalStart, second.physicalGlobalStart); - swap(first.comm1d, second.comm1d); - swap(first.comm1d_aux, second.comm1d_aux); - swap(first.comm3d, second.comm3d); - swap(first.comm3d_aux, second.comm3d_aux); - swap(first.rank, second.rank); - swap(first.requests, second.requests); - swap(first.numRequests, second.numRequests); - swap(first.neighbour, second.neighbour); - swap(first.neighbour_index, second.neighbour_index); - swap(first.ntasksPerDim, second.ntasksPerDim); - swap(first.taskPosition, second.taskPosition); - swap(first.periodic, second.periodic); - swap(first.globalSize, second.globalSize); - swap(first.localSize, second.localSize); - swap(first.storageSize, second.storageSize); - swap(first.localStart, second.localStart); - swap(first.neighbourSendType, second.neighbourSendType); - swap(first.neighbourReceiveType, second.neighbourReceiveType); - swap(first.data, second.data); - } - - // Copy constructor - FsGrid(const FsGrid& other) : - DX {other.DX}, - DY {other.DY}, - DZ {other.DZ}, - physicalGlobalStart {other.physicalGlobalStart}, - comm1d {MPI_COMM_NULL}, - comm1d_aux {MPI_COMM_NULL}, - comm3d {MPI_COMM_NULL}, - comm3d_aux {MPI_COMM_NULL}, - rank {other.rank}, - requests {}, - numRequests {0}, - neighbour {other.neighbour}, - neighbour_index {other.neighbour_index}, - ntasksPerDim {other.ntasksPerDim}, - taskPosition {other.taskPosition}, - periodic {other.periodic}, - globalSize {other.globalSize}, - localSize {other.localSize}, - storageSize {other.storageSize}, - localStart {other.localStart}, - neighbourSendType {}, - neighbourReceiveType {}, - data {other.data} - { - if (other.comm3d != MPI_COMM_NULL) { - MPI_Comm_dup(other.comm3d, &comm3d); - } - - neighbourSendType.fill(MPI_DATATYPE_NULL); - neighbourReceiveType.fill(MPI_DATATYPE_NULL); - for (size_t i = 0; i < neighbourSendType.size(); ++i) { - if (other.neighbourSendType[i] != MPI_DATATYPE_NULL) { - MPI_Type_dup(other.neighbourSendType[i], neighbourSendType.data() + i); - } - if (other.neighbourReceiveType[i] != MPI_DATATYPE_NULL) { - MPI_Type_dup(other.neighbourReceiveType[i], neighbourReceiveType.data() + i); - } - } - } - - // Move constructor - // We don't have a default constructor, so just set the MPI stuff NULL - FsGrid(FsGrid&& other) noexcept : - comm1d {MPI_COMM_NULL}, - comm1d_aux {MPI_COMM_NULL}, - comm3d {MPI_COMM_NULL}, - comm3d_aux {MPI_COMM_NULL}, - neighbourSendType {}, - neighbourReceiveType {} - { - // NULL all the MPI stuff so they won't get freed if destroyed - neighbourSendType.fill(MPI_DATATYPE_NULL); - neighbourReceiveType.fill(MPI_DATATYPE_NULL); - - using std::swap; - swap(*this, other); - } - - // Copy assignment - // Canonical copy assign is construct to temp and swap, but this would result in an extra allocation of the entire grid - // Copy assignment is currently not used in Vlasiator, so the operator is deleted as any use is likely either erroneous or just a bad idea - FsGrid& operator=(FsGrid other) = delete; - - // Move assignment - FsGrid& operator=(FsGrid&& other) noexcept { - using std::swap; - swap(*this, other); - return *this; - } - - /*! Returns the task responsible, and its localID for handling the cell with the given GlobalID - * \param id GlobalID of the cell for which task is to be determined - * \return a task for the grid's cartesian communicator - */ - std::pair getTaskForGlobalID(GlobalID id) { - // Transform globalID to global cell coordinate - std::array cell = FsGridTools::globalIDtoCellCoord(id, globalSize); - - // Find the index in the task grid this Cell belongs to - std::array taskIndex; - for(int i=0; i<3; i++) { - int n_per_task = globalSize[i] / ntasksPerDim[i]; - int remainder = globalSize[i] % ntasksPerDim[i]; - - if(cell[i] < remainder * (n_per_task+1)) { - taskIndex[i] = cell[i] / (n_per_task + 1); - } else { - taskIndex[i] = remainder + (cell[i] - remainder*(n_per_task+1)) / n_per_task; - } - } - - // Get the task number from the communicator - std::pair retVal; - int status = MPI_Cart_rank(comm3d, taskIndex.data(), &retVal.first); - if(status != MPI_SUCCESS) { - std::cerr << "Unable to find FsGrid rank for global ID " << id << " (coordinates ["; - for(int i=0; i<3; i++) { - std::cerr << cell[i] << ", "; - } - std::cerr << "]" << std::endl; - return std::pair(MPI_PROC_NULL,0); - } - - // Determine localID of that cell within the target task - std::array thatTasksStart; - std::array thatTaskStorageSize; - for(int i=0; i<3; i++) { - thatTasksStart[i] = calcLocalStart(globalSize[i], ntasksPerDim[i], taskIndex[i]); - thatTaskStorageSize[i] = calcLocalSize(globalSize[i], ntasksPerDim[i], taskIndex[i]) + 2 * stencil; - } - - retVal.second = 0; - int stride = 1; - for(int i=0; i<3; i++) { - if(globalSize[i] <= 1) { - // Collapsed dimension, doesn't contribute. - retVal.second += 0; - } else { - retVal.second += stride*(cell[i] - thatTasksStart[i] + stencil); - stride *= thatTaskStorageSize[i]; - } - } - - return retVal; - } - - /*! Transform global cell coordinates into the local domain. - * If the coordinates are out of bounds, (-1,-1,-1) is returned. - * \param x The cell's global x coordinate - * \param y The cell's global y coordinate - * \param z The cell's global z coordinate - */ - std::array globalToLocal(FsSize_t x, FsSize_t y, FsSize_t z) { - std::array retval; - retval[0] = (FsIndex_t)x - localStart[0]; - retval[1] = (FsIndex_t)y - localStart[1]; - retval[2] = (FsIndex_t)z - localStart[2]; - - if(retval[0] >= localSize[0] || retval[1] >= localSize[1] || retval[2] >= localSize[2] - || retval[0] < 0 || retval[1] < 0 || retval[2] < 0) { - return {-1,-1,-1}; - } - - return retval; - } - - /*! Determine the cell's GlobalID from its local x,y,z coordinates - * \param x The cell's task-local x coordinate - * \param y The cell's task-local y coordinate - * \param z The cell's task-local z coordinate - */ - GlobalID GlobalIDForCoords(int x, int y, int z) { - return x + localStart[0] + globalSize[0] * (y + localStart[1]) + globalSize[0] * globalSize[1] * (z + localStart[2]); - } - /*! Determine the cell's LocalID from its local x,y,z coordinates - * \param x The cell's task-local x coordinate - * \param y The cell's task-local y coordinate - * \param z The cell's task-local z coordinate - */ - LocalID LocalIDForCoords(int x, int y, int z) { - LocalID index=0; - if(globalSize[2] > 1) { - index += storageSize[0]*storageSize[1]*(stencil+z); - } - if(globalSize[1] > 1) { - index += storageSize[0]*(stencil+y); - } - if(globalSize[0] > 1 ) { - index += stencil+x; - } - - return index; - } - - /*! Perform ghost cell communication. - */ - void updateGhostCells() { - - if(rank == -1) return; - - //TODO, faster with simultaneous isends& ireceives? - std::array receiveRequests; - std::array sendRequests; - - for(int i = 0; i < 27; i++){ - receiveRequests[i] = MPI_REQUEST_NULL; - sendRequests[i] = MPI_REQUEST_NULL; - } - - - for(int x=-1; x<=1;x++) { - for(int y=-1; y<=1;y++) { - for(int z=-1; z<=1; z++) { - int shiftId = (x+1) * 9 + (y + 1) * 3 + (z + 1); - int receiveId = (1 - x) * 9 + ( 1 - y) * 3 + ( 1 - z); - if(neighbour[receiveId] != MPI_PROC_NULL && - neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { - MPI_Irecv(data.data(), 1, neighbourReceiveType[shiftId], neighbour[receiveId], shiftId, comm3d, &(receiveRequests[shiftId])); - } - } - } - } - - for(int x=-1; x<=1;x++) { - for(int y=-1; y<=1;y++) { - for(int z=-1; z<=1; z++) { - int shiftId = (x+1) * 9 + (y + 1) * 3 + (z + 1); - int sendId = shiftId; - if(neighbour[sendId] != MPI_PROC_NULL && - neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { - MPI_Isend(data.data(), 1, neighbourSendType[shiftId], neighbour[sendId], shiftId, comm3d, &(sendRequests[shiftId])); - } - } - } - } - MPI_Waitall(27, receiveRequests.data(), MPI_STATUSES_IGNORE); - MPI_Waitall(27, sendRequests.data(), MPI_STATUSES_IGNORE); - } - - /*! Get the size of the local domain handled by this grid. - */ - std::array& getLocalSize() { - return localSize; - } - - /*! Get the start coordinates of the local domain handled by this grid. - */ - std::array& getLocalStart() { - return localStart; - } - - /*! Get global size of the fsgrid domain - */ - std::array& getGlobalSize() { - return globalSize; - } - - /*! Calculate global cell position (XYZ in global cell space) from local cell coordinates. - * - * \param x x-Coordinate, in cells - * \param y y-Coordinate, in cells - * \param z z-Coordinate, in cells - * - * \return Global cell coordinates - */ - std::array getGlobalIndices(int64_t x, int64_t y, int64_t z) { - std::array retval; - retval[0] = localStart[0] + x; - retval[1] = localStart[1] + y; - retval[2] = localStart[2] + z; - - return retval; - } - - /*! Get a reference to the field data in a cell - * \param x x-Coordinate, in cells - * \param y y-Coordinate, in cells - * \param z z-Coordinate, in cells - * \return A reference to cell data in the given cell - */ - T* get(int x, int y, int z) { - - // Keep track which neighbour this cell actually belongs to (13 = ourself) - int isInNeighbourDomain=13; - int coord_shift[3] = {0,0,0}; - if(x < 0) { - isInNeighbourDomain -= 9; - coord_shift[0] = 1; - } - if(x >= localSize[0]) { - isInNeighbourDomain += 9; - coord_shift[0] = -1; - } - if(y < 0) { - isInNeighbourDomain -= 3; - coord_shift[1] = 1; - } - if(y >= localSize[1]) { - isInNeighbourDomain += 3; - coord_shift[1] = -1; - } - if(z < 0) { - isInNeighbourDomain -= 1; - coord_shift[2] = 1; - } - if(z >= localSize[2]) { - isInNeighbourDomain += 1; - coord_shift[2] = -1; - } - - // Santiy-Check that the requested cell is actually inside our domain - // TODO: ugh, this is ugly. -#ifdef FSGRID_DEBUG - bool inside=true; - if(localSize[0] <= 1 && !periodic[0]) { - if(x != 0) { - std::cerr << "x != 0 despite non-periodic x-axis with only one cell." << std::endl; - inside = false; - } - } else { - if(x < -stencil || x >= localSize[0] + stencil) { - std::cerr << "x = " << x << " is outside of [ " << -stencil << - ", " << localSize[0] + stencil << "[!" << std::endl; - inside = false; - } - } - - if(localSize[1] <= 1 && !periodic[1]) { - if(y != 0) { - std::cerr << "y != 0 despite non-periodic y-axis with only one cell." << std::endl; - inside = false; - } - } else { - if(y < -stencil || y >= localSize[1] + stencil) { - std::cerr << "y = " << y << " is outside of [ " << -stencil << - ", " << localSize[1] + stencil << "[!" << std::endl; - inside = false; - } - } - - if(localSize[2] <= 1 && !periodic[2]) { - if(z != 0) { - std::cerr << "z != 0 despite non-periodic z-axis with only one cell." << std::endl; - inside = false; - } - } else { - if(z < -stencil || z >= localSize[2] + stencil) { - inside = false; - std::cerr << "z = " << z << " is outside of [ " << -stencil << - ", " << localSize[2] + stencil << "[!" << std::endl; - } - } - if(!inside) { - std::cerr << "Out-of bounds access in FsGrid::get! Expect weirdness." << std::endl; - return NULL; - } -#endif // FSGRID_DEBUG - - if(isInNeighbourDomain != 13) { - - // Check if the corresponding neighbour exists - if(neighbour[isInNeighbourDomain]==MPI_PROC_NULL) { - // Neighbour doesn't exist, we must be an outer boundary cell - // (or something is quite wrong) - return NULL; - - } else if(neighbour[isInNeighbourDomain] == rank) { - // For periodic boundaries, where the neighbour is actually ourself, - // return our own actual cell instead of the ghost - x += coord_shift[0] * localSize[0]; - y += coord_shift[1] * localSize[1]; - z += coord_shift[2] * localSize[2]; - } - // Otherwise we return the ghost cell - } - LocalID index = LocalIDForCoords(x,y,z); - - return &data[index]; - } - - T* get(LocalID id) { - if(id < 0 || (unsigned int)id > data.size()) { - std::cerr << "Out-of-bounds access in FsGrid::get!" << std::endl - << "(LocalID = " << id << ", but storage space is " << data.size() - << ". Expect weirdness." << std::endl; - return NULL; - } - return &data[id]; - } - - /*! Get the physical coordinates in the global simulation space for - * the given cell. - * - * \param x local x-Coordinate, in cells - * \param y local y-Coordinate, in cells - * \param z local z-Coordinate, in cells - */ - std::array getPhysicalCoords(int x, int y, int z) { - std::array coords; - coords[0] = physicalGlobalStart[0] + (localStart[0]+x)*DX; - coords[1] = physicalGlobalStart[1] + (localStart[1]+y)*DY; - coords[2] = physicalGlobalStart[2] + (localStart[2]+z)*DZ; - - return coords; - } - - /*! Debugging output helper function. Allows for nicely formatted printing - * of grid contents. Since the grid data format is varying, the actual - * printing should be done in a lambda passed to this function. Example usage - * to plot |B|: - * - * perBGrid.debugOutput([](const std::array& a)->void{ - * cerr << sqrt(a[0]*a[0]+a[1]*a[1]+a[2]*a[2]) << ", "; - * }); - * - * \param func Function pointer (or lambda) which is called with a cell reference, - * in order. Use std::cerr in it to print desired value. - */ - void debugOutput(void (func)(const T&)) { - int xmin=0,xmax=1; - int ymin=0,ymax=1; - int zmin=0,zmax=1; - if(localSize[0] > 1) { - xmin = -stencil; xmax = localSize[0]+stencil; - } - if(localSize[1] > 1) { - ymin = -stencil; ymax = localSize[1]+stencil; - } - if(localSize[2] > 1) { - zmin = -stencil; zmax = localSize[2]+stencil; - } - for(int z=zmin; z& getPeriodic() { - return periodic; - } - - /*! Perform an MPI_Allreduce with this grid's internal communicator - * Function syntax is identical to MPI_Allreduce, except the final (communicator - * argument will not be needed) */ - int Allreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op) { - - // If a normal FS-rank - if(rank != -1){ - return MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm3d); - } - // If a non-FS rank, no need to communicate - else{ - int datatypeSize; - MPI_Type_size(datatype, &datatypeSize); - for(int i = 0; i < count * datatypeSize; ++i) - ((char*)recvbuf)[i] = ((char*)sendbuf)[i]; - return MPI_ERR_RANK; // This is ok for a non-FS rank - } - } - - /*! Get the decomposition array*/ - std::array& getDecomposition(){ - return ntasksPerDim; - } - - /*! Physical grid spacing and physical coordinate space start. - * TODO: Should this be private and have accesor-functions? - */ - double DX,DY,DZ; - std::array physicalGlobalStart; - - private: - //! MPI Cartesian communicator used in this grid - MPI_Comm comm1d = MPI_COMM_NULL; - MPI_Comm comm1d_aux = MPI_COMM_NULL; - MPI_Comm comm3d = MPI_COMM_NULL; - MPI_Comm comm3d_aux = MPI_COMM_NULL; - int rank; //!< This task's rank in the communicator - std::vector requests; - uint numRequests; - - std::array neighbour; //!< Tasks of the 26 neighbours (plus ourselves) - std::vector neighbour_index; //!< Lookup table from rank to index in the neighbour array - - // We have, fundamentally, two different coordinate systems we're dealing with: - // 1) Task grid in the MPI_Cartcomm - std::array ntasksPerDim; //!< Number of tasks in each direction - std::array taskPosition; //!< This task's position in the 3d task grid - // 2) Cell numbers in global and local view - - std::array periodic; //!< Information about whether a given direction is periodic - std::array globalSize; //!< Global size of the simulation space, in cells - std::array localSize; //!< Local size of simulation space handled by this task (without ghost cells) - std::array storageSize; //!< Local size of simulation space handled by this task (including ghost cells) - std::array localStart; //!< Offset of the local - //!coordinate system against - //!the global one - - std::array neighbourSendType; //!< Datatype for sending data - std::array neighbourReceiveType; //!< Datatype for receiving data - //! Actual storage of field data - std::vector data; -}; +#include "src/data.hpp" +#include "src/grid.hpp" +#include "src/tools.hpp" diff --git a/src/coordinates.hpp b/src/coordinates.hpp new file mode 100644 index 0000000..64c08d0 --- /dev/null +++ b/src/coordinates.hpp @@ -0,0 +1,358 @@ +#pragma once + +/* + Copyright (C) 2016 Finnish Meteorological Institute + Copyright (C) 2016-2024 CSC -IT Center for Science + + This file is part of fsgrid + + fsgrid is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + fsgrid is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; + without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fsgrid. If not, see . +*/ +#include "tools.hpp" + +namespace fsgrid_detail { +using namespace fsgrid; + +constexpr static std::array computeNumTasksPerDim(std::array globalSize, + const std::array& decomposition, + int32_t numRanks, int32_t numGhostCells) { + const bool allZero = decomposition[0] == 0 && decomposition[1] == 0 && decomposition[2] == 0; + if (allZero) { + return computeDomainDecomposition(globalSize, numRanks, numGhostCells); + } + + const bool incorrectDistribution = decomposition[0] * decomposition[1] * decomposition[2] != numRanks; + if (incorrectDistribution) { + std::cerr << "Given decomposition (" << decomposition[0] << " " << decomposition[1] << " " << decomposition[2] + << ") does not distribute to the number of tasks given" << std::endl; + throw std::runtime_error("Given decomposition does not distribute to the number of tasks given"); + } + + return decomposition; +} + +constexpr static bool localSizeTooSmall(std::array globalSize, std::array localSize, + int32_t numGhostCells) { + const bool anyLocalIsZero = localSize[0] == 0 || localSize[1] == 0 || localSize[2] == 0; + const bool bounded = (globalSize[0] > static_cast(numGhostCells) && numGhostCells > localSize[0]) || + (globalSize[1] > static_cast(numGhostCells) && numGhostCells > localSize[1]) || + (globalSize[2] > static_cast(numGhostCells) && numGhostCells > localSize[2]); + + return anyLocalIsZero || bounded; +} + +constexpr static std::array calculateLocalSize(const std::array& globalSize, + const std::array& numTasksPerDim, + const std::array& taskPosition, + int32_t numGhostCells) { + if (taskPosition[0] < 0 || taskPosition[1] < 0 || taskPosition[2] < 0) { + return std::array{0, 0, 0}; + } + + std::array localSize = { + calcLocalSize(globalSize[0], numTasksPerDim[0], taskPosition[0]), + calcLocalSize(globalSize[1], numTasksPerDim[1], taskPosition[1]), + calcLocalSize(globalSize[2], numTasksPerDim[2], taskPosition[2]), + }; + + if (localSizeTooSmall(globalSize, localSize, numGhostCells)) { + std::cerr << "FSGrid space partitioning leads to a space that is too small.\n"; + std::cerr << "Please run with a different number of Tasks, so that space is better divisible." << std::endl; + throw std::runtime_error("FSGrid too small domains"); + } + + return localSize; +} + +constexpr static std::array calculateLocalStart(const std::array& globalSize, + const std::array& numTasksPerDim, + const std::array& taskPosition) { + return { + calcLocalStart(globalSize[0], numTasksPerDim[0], taskPosition[0]), + calcLocalStart(globalSize[1], numTasksPerDim[1], taskPosition[1]), + calcLocalStart(globalSize[2], numTasksPerDim[2], taskPosition[2]), + }; +} + +constexpr static std::array calculateStorageSize(const std::array& globalSize, + const std::array& localSize, + int32_t numGhostCells) { + return { + globalSize[0] <= 1 ? 1 : localSize[0] + numGhostCells * 2, + globalSize[1] <= 1 ? 1 : localSize[1] + numGhostCells * 2, + globalSize[2] <= 1 ? 1 : localSize[2] + numGhostCells * 2, + }; +} +} // namespace fsgrid_detail + +namespace fsgrid { +using namespace fsgrid_detail; + +struct Coordinates { + constexpr Coordinates() {} + constexpr Coordinates(const std::array& physicalGridSpacing, + const std::array& physicalGlobalStart, const std::array& globalSize, + const std::array& periodic, const std::array& decomposition, + const std::array& taskPosition, int32_t numRanks, int32_t numGhostCells) + : numGhostCells(numGhostCells), physicalGridSpacing(physicalGridSpacing), + physicalGlobalStart(physicalGlobalStart), globalSize(globalSize), periodic(periodic), + numTasksPerDim(computeNumTasksPerDim(globalSize, decomposition, numRanks, numGhostCells)), + localSize(calculateLocalSize(globalSize, numTasksPerDim, taskPosition, numGhostCells)), + localStart(calculateLocalStart(globalSize, numTasksPerDim, taskPosition)), + storageSize(calculateStorageSize(globalSize, localSize, numGhostCells)) {} + + /*! Determine the cell's GlobalID from its local x,y,z coordinates + * \param x The cell's task-local x coordinate + * \param y The cell's task-local y coordinate + * \param z The cell's task-local z coordinate + */ + constexpr GlobalID globalIDFromLocalCoordinates(FsIndex_t x, FsIndex_t y, FsIndex_t z) const { + // Perform casts to avoid overflow + const std::array global = localToGlobal(x, y, z); + const auto xcontrib = global[0]; + const auto ycontrib = static_cast(globalSize[0]) * static_cast(global[1]); + const auto zcontrib = static_cast(globalSize[0]) * static_cast(globalSize[1]) * + static_cast(global[2]); + return xcontrib + ycontrib + zcontrib; + } + + /*! Determine the cell's GlobalID from its local x,y,z coordinates + * \param xyz The cell's task-local x,y,z coordinates in one array + */ + constexpr GlobalID globalIDFromLocalCoordinates(std::array xyz) const { + return globalIDFromLocalCoordinates(xyz[0], xyz[1], xyz[2]); + } + + /*! Determine the cell's LocalID from its local x,y,z coordinates + * \param x The cell's task-local x coordinate + * \param y The cell's task-local y coordinate + * \param z The cell's task-local z coordinate + */ + constexpr LocalID localIDFromLocalCoordinates(FsIndex_t x, FsIndex_t y, FsIndex_t z) const { + // Perform casts to avoid overflow + const auto xcontrib = static_cast(globalSize[0] > 1) * static_cast(numGhostCells + x); + const auto ycontrib = static_cast(globalSize[1] > 1) * static_cast(storageSize[0]) * + static_cast(numGhostCells + y); + const auto zcontrib = static_cast(globalSize[2] > 1) * static_cast(storageSize[0]) * + static_cast(storageSize[1]) * static_cast(numGhostCells + z); + + return xcontrib + ycontrib + zcontrib; + } + + constexpr LocalID localIDFromLocalCoordinates(const std::array& indices) const { + return localIDFromLocalCoordinates(indices[0], indices[1], indices[2]); + } + + /*! Determine the cell's local coordinates in cells from its stencilID + * \param localID stencil ID + */ + constexpr std::array localCoordsFromStencilID(size_t stencilID) const { + const LocalID idPerGs0 = stencilID / storageSize[0]; + const LocalID idPerGs0PerGs1 = idPerGs0 / storageSize[1]; + return { + (FsIndex_t)(stencilID % storageSize[0] - (globalSize[0] > 1 ? numGhostCells : 0)), + (FsIndex_t)(idPerGs0 % storageSize[1] - (globalSize[1] > 1 ? numGhostCells : 0)), + (FsIndex_t)(idPerGs0PerGs1 % storageSize[2] - (globalSize[2] > 1 ? numGhostCells : 0)), + }; + } + + /*! Transform global cell coordinates into the local domain. + * If the coordinates are out of bounds, (-1,-1,-1) is returned. + * \param x The cell's global x coordinate + * \param y The cell's global y coordinate + * \param z The cell's global z coordinate + */ + constexpr std::array globalToLocal(FsSize_t x, FsSize_t y, FsSize_t z) const { + // Perform this check before doing the subtraction to avoid cases of underflow and overflow + // Particularly for the first three checks: + // - casting the localStart to unsigned and then doing the subtraction might cause underflow + // - casting the global coordinate to signed might overflow, due to global being too large to fit to the signed + // type + bool outOfBounds = x < static_cast(localStart[0]); + outOfBounds |= y < static_cast(localStart[1]); + outOfBounds |= z < static_cast(localStart[2]); + outOfBounds |= x >= static_cast(localSize[0]) + static_cast(localStart[0]); + outOfBounds |= y >= static_cast(localSize[1]) + static_cast(localStart[1]); + outOfBounds |= z >= static_cast(localSize[2]) + static_cast(localStart[2]); + + if (outOfBounds) { + return {-1, -1, -1}; + } else { + // This neither over nor underflows as per the checks above + return { + static_cast(x - static_cast(localStart[0])), + static_cast(y - static_cast(localStart[1])), + static_cast(z - static_cast(localStart[2])), + }; + } + } + + /*! Calculate global cell position (XYZ in global cell space) from local cell coordinates. + * + * \param x x-Coordinate, in cells + * \param y y-Coordinate, in cells + * \param z z-Coordinate, in cells + * + * \return Global cell coordinates + */ + constexpr std::array localToGlobal(FsIndex_t x, FsIndex_t y, FsIndex_t z) const { + // Cast both before adding to avoid overflow + return { + static_cast(localStart[0]) + static_cast(x), + static_cast(localStart[1]) + static_cast(y), + static_cast(localStart[2]) + static_cast(z), + }; + } + + /*! Get the physical coordinates in the global simulation space for + * the given cell. + * + * \param x local x-Coordinate, in cells + * \param y local y-Coordinate, in cells + * \param z local z-Coordinate, in cells + */ + constexpr std::array getPhysicalCoords(FsIndex_t x, FsIndex_t y, FsIndex_t z) const { + return { + physicalGlobalStart[0] + (localStart[0] + x) * physicalGridSpacing[0], + physicalGlobalStart[1] + (localStart[1] + y) * physicalGridSpacing[1], + physicalGlobalStart[2] + (localStart[2] + z) * physicalGridSpacing[2], + }; + } + + /*! Get the physical coordinates in the global simulation space for + * the given cell. + * + * \param ijk local coordinates, in cells + */ + constexpr std::array getPhysicalCoords(std::array ijk) const { + return getPhysicalCoords(ijk[0], ijk[1], ijk[2]); + } + + /*! Get the global cell coordinates for the given physical coordinates. + * + * \param x physical x-Coordinate + * \param y physical y-Coordinate + * \param z physical z-Coordinate + */ + constexpr std::array physicalToGlobal(double x, double y, double z) const { + return { + static_cast((x - physicalGlobalStart[0]) / physicalGridSpacing[0]), + static_cast((y - physicalGlobalStart[1]) / physicalGridSpacing[1]), + static_cast((z - physicalGlobalStart[2]) / physicalGridSpacing[2]), + }; + } + + /*! Get the (fractional) global cell coordinates for the given physical coordinates. + * + * \param x physical x-Coordinate + * \param y physical y-Coordinate + * \param z physical z-Coordinate + */ + constexpr std::array physicalToCellFractional(double x, double y, double z) const { + const auto global = physicalToGlobal(x, y, z); + return { + (x - physicalGlobalStart[0]) / physicalGridSpacing[0] - global[0], + (y - physicalGlobalStart[1]) / physicalGridSpacing[1] - global[1], + (z - physicalGlobalStart[2]) / physicalGridSpacing[2] - global[2], + }; + } + + constexpr std::array globalIdToTaskPos(GlobalID id) const { + const std::array cell = globalIDtoCellCoord(id, globalSize); + + auto computeIndex = [&](uint32_t i) { + const FsIndex_t nPerTask = static_cast(globalSize[i] / static_cast(numTasksPerDim[i])); + const FsIndex_t nPerTaskPlus1 = nPerTask + 1; + const FsIndex_t remainder = static_cast(globalSize[i] % static_cast(numTasksPerDim[i])); + + return cell[i] < remainder * nPerTaskPlus1 ? cell[i] / nPerTaskPlus1 + : remainder + (cell[i] - remainder * nPerTaskPlus1) / nPerTask; + }; + + return { + computeIndex(0), + computeIndex(1), + computeIndex(2), + }; + } + + /*! Compute the neighbourIndex (in range [0, 26]) from the (local) cell indices. Note that the inputs are not local + * coordinates, but rather coordinates that include the ghost cells as well. + * + * \param x cell x-Coordinate + * \param y cell y-Coordinate + * \param z cell z-Coordinate + */ + constexpr FsSize_t neighbourIndexFromCellCoordinates(FsIndex_t x, FsIndex_t y, FsIndex_t z) const { + return 13 - static_cast(x < 0) * 9 + static_cast(x >= localSize[0]) * 9 - + static_cast(y < 0) * 3 + static_cast(y >= localSize[1]) * 3 - + static_cast(z < 0) + static_cast(z >= localSize[2]); + } + + /*! Compute the (shifted) local indices from the (local) cell indices. Note that the inputs are not local + * coordinates, but rather coordinates that include the ghost cells as well. + * + * \param x cell x-Coordinate + * \param y cell y-Coordinate + * \param z cell z-Coordinate + */ + constexpr std::array shiftCellIndices(FsIndex_t x, FsIndex_t y, FsIndex_t z) const { + return { + x + static_cast(x < 0) * localSize[0] - static_cast(x >= localSize[0]) * localSize[0], + y + static_cast(y < 0) * localSize[1] - static_cast(y >= localSize[1]) * localSize[1], + z + static_cast(z < 0) * localSize[2] - static_cast(z >= localSize[2]) * localSize[2], + }; + } + + /*! Check that the (local) cell indices are within our domain. Note that the inputs are not local + * coordinates, but rather coordinates that include the ghost cells as well. + * + * \param x cell x-Coordinate + * \param y cell y-Coordinate + * \param z cell z-Coordinate + */ + constexpr bool cellIndicesAreWithinBounds(FsIndex_t x, FsIndex_t y, FsIndex_t z) const { + const std::array minimums{ + localSize[0] > 1 || periodic[0] ? -numGhostCells : 0, + localSize[1] > 1 || periodic[1] ? -numGhostCells : 0, + localSize[2] > 1 || periodic[2] ? -numGhostCells : 0, + }; + const std::array maximums{ + localSize[0] > 1 || periodic[0] ? localSize[0] + numGhostCells : 1, + localSize[1] > 1 || periodic[1] ? localSize[1] + numGhostCells : 1, + localSize[2] > 1 || periodic[2] ? localSize[2] + numGhostCells : 1, + }; + + return minimums[0] <= x && x < maximums[0] && minimums[1] <= y && y < maximums[1] && minimums[2] <= z && + z < maximums[2]; + } + constexpr auto cellIndicesAreWithinBounds(const std::array& indices) const { + return cellIndicesAreWithinBounds(indices[0], indices[1], indices[2]); + } + + // ======================= + // Variables + // ======================= + const int32_t numGhostCells = 0; + const std::array physicalGridSpacing = {}; + const std::array physicalGlobalStart = {}; + const std::array globalSize = {}; + const std::array periodic = {}; + const std::array numTasksPerDim = {}; + const std::array localSize = {}; + const std::array localStart = {}; + const std::array storageSize = {}; +}; +} // namespace fsgrid diff --git a/src/data.hpp b/src/data.hpp new file mode 100644 index 0000000..242e74d --- /dev/null +++ b/src/data.hpp @@ -0,0 +1,81 @@ +#pragma once + +/* + Copyright (C) 2016 Finnish Meteorological Institute + Copyright (C) 2016-2024 CSC -IT Center for Science + + This file is part of fsgrid + + fsgrid is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + fsgrid is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; + without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fsgrid. If not, see . +*/ + +#include +#include +#include +#include +#include +#include + +namespace fsgrid { +template struct Data { +private: + size_t num_elements = 0ul; + std::unique_ptr memory; + +public: + Data(size_t num_elements) + : num_elements(num_elements), + memory(static_cast(MemOps::allocate(getMemReq(num_elements))), &MemOps::deallocate) { + MemOps::memset(memory.get(), 0, getMemReq(num_elements)); + } + + Data(const std::span elements) : Data(elements.size()) { + MemOps::memcpy(memory.get(), elements.data(), getMemReq(elements.size())); + } + + [[nodiscard]] static size_t getMemReq(size_t n) { return n * sizeof(T); } + + [[nodiscard]] T& operator[](size_t i) { return data()[i]; } + [[nodiscard]] const T& operator[](size_t i) const { return data()[i]; } + + [[nodiscard]] size_t size() const { return num_elements; } + + [[nodiscard]] T* data() { return static_cast(static_cast(memory.get())); } + [[nodiscard]] T const* data() const { return static_cast(static_cast(memory.get())); } + + [[nodiscard]] std::span view() { return std::span(data(), size()); } + [[nodiscard]] std::span view() const { return std::span(data(), size()); } + + void swap(Data& other) noexcept { + std::swap(num_elements, other.num_elements); + memory.swap(other.memory); + } +}; + +template void swap(Data& a, Data& b) noexcept { a.swap(b); } + +// Use this for help, if you need to implement these operations for e.g. Cuda or Hip +struct CMemoryOperations { + static void* allocate(size_t bytes) { return std::malloc(bytes); } + static void deallocate(void* ptr) { std::free(ptr); } + static void memcpy(void* dst, const void* src, size_t bytes, bool = false) { std::memcpy(dst, src, bytes); } + static void memset(void* dst, int pattern, size_t bytes, bool = false) { std::memset(dst, pattern, bytes); } +}; + +// With ifdefs change this to have the desired memory operations template parameter +// This avoids having to specify it everywhere in vlasiator. +template using FsData = Data; + +} // namespace fsgrid diff --git a/src/grid.hpp b/src/grid.hpp new file mode 100644 index 0000000..7265c38 --- /dev/null +++ b/src/grid.hpp @@ -0,0 +1,638 @@ +#pragma once + +/* + Copyright (C) 2016 Finnish Meteorological Institute + Copyright (C) 2016-2024 CSC -IT Center for Science + + This file is part of fsgrid + + fsgrid is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + fsgrid is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; + without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fsgrid. If not, see . +*/ +#include "coordinates.hpp" +#include "stencil.hpp" +#include "tools.hpp" + +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include + +namespace fsgrid_detail { +using namespace fsgrid; + +// Assumes x, y and z to belong to set [-1, 0, 1] +// returns a value in (inclusive) range [0, 26] +constexpr static uint32_t xyzToLinear(int32_t x, int32_t y, int32_t z) { + return static_cast((x + 1) * 9 + (y + 1) * 3 + (z + 1)); +} + +// These assume i to be in (inclusive) range [0, 26] +// returns a value from the set [-1, 0, 1] +constexpr static int32_t linearToX(uint32_t i) { return static_cast(i) / 9 - 1; } +constexpr static int32_t linearToY(uint32_t i) { return (static_cast(i) % 9) / 3 - 1; } +constexpr static int32_t linearToZ(uint32_t i) { return static_cast(i) % 3 - 1; } + +static std::array mapNeigbourIndexToRank(const std::array& taskPosition, + const std::array& numTasksPerDim, + const std::array& periodic, MPI_Comm comm, + int32_t rank) { + auto calculateNeighbourRank = [&](uint32_t neighbourIndex) { + auto calculateNeighbourPosition = [&](uint32_t neighbourIndex, uint32_t i) { + const auto pos3D = + i == 0 ? linearToX(neighbourIndex) : (i == 1 ? linearToY(neighbourIndex) : linearToZ(neighbourIndex)); + const auto nonPeriodicPos = taskPosition[i] + pos3D; + return periodic[i] ? (nonPeriodicPos + numTasksPerDim[i]) % numTasksPerDim[i] : nonPeriodicPos; + }; + + const std::array neighbourPosition = { + calculateNeighbourPosition(neighbourIndex, 0), + calculateNeighbourPosition(neighbourIndex, 1), + calculateNeighbourPosition(neighbourIndex, 2), + }; + + const bool taskPositionWithinLimits = numTasksPerDim[0] > neighbourPosition[0] && neighbourPosition[0] >= 0 && + numTasksPerDim[1] > neighbourPosition[1] && neighbourPosition[1] >= 0 && + numTasksPerDim[2] > neighbourPosition[2] && neighbourPosition[2] >= 0; + + if (taskPositionWithinLimits) { + int32_t neighbourRank; + mpiCheck(MPI_Cart_rank(comm, neighbourPosition.data(), &neighbourRank), "Rank ", rank, + " can't determine neighbour rank at position [", neighbourPosition[0], ", ", neighbourPosition[1], + ", ", neighbourPosition[2], "]"); + return neighbourRank; + } else { + return MPI_PROC_NULL; + } + }; + + std::array ranks; + if (rank == -1) { + ranks.fill(MPI_PROC_NULL); + } else { + std::generate(ranks.begin(), ranks.end(), + [&calculateNeighbourRank, n = 0u]() mutable { return calculateNeighbourRank(n++); }); + } + return ranks; +} + +static std::vector mapNeighbourRankToIndex(const std::array& indexToRankMap, int32_t numRanks) { + std::vector indices(static_cast(numRanks), MPI_PROC_NULL); + std::for_each(indexToRankMap.cbegin(), indexToRankMap.cend(), [&indices, &numRanks, n = 0u](int32_t rank) mutable { + if (rank >= 0 && numRanks > rank) { + indices[static_cast(rank)] = static_cast(n); + } + n++; + }); + return indices; +} + +static int32_t getCommRank(MPI_Comm parentComm) { + int32_t parentRank = -1; + mpiCheck(MPI_Comm_rank(parentComm, &parentRank), "Couldn't get rank from parent communicator"); + return parentRank; +} + +static MPI_Comm createCartesianCommunicator(MPI_Comm parentComm, const std::array& numTasksPerDim, + const std::array& isPeriodic, int32_t numProcs) { + const auto parentRank = getCommRank(parentComm); + const auto colour = (parentRank < numProcs) ? 1 : MPI_UNDEFINED; + + MPI_Comm comm = MPI_COMM_NULL; + mpiCheck(MPI_Comm_split(parentComm, colour, parentRank, &comm), + "Couldn's split parent communicator to subcommunicators"); + + const std::array pi = { + isPeriodic[0], + isPeriodic[1], + isPeriodic[2], + }; + + MPI_Comm comm3d = MPI_COMM_NULL; + if (comm != MPI_COMM_NULL) { + mpiCheck(MPI_Cart_create(comm, 3, numTasksPerDim.data(), pi.data(), 0, &comm3d), + "Creating cartesian communicatior failed when attempting to create FsGrid!"); + + mpiCheck(MPI_Comm_free(&comm), "Failed to free MPI comm"); + } + + return comm3d; +} + +static int32_t getCartesianRank(MPI_Comm cartesianComm) { + return cartesianComm != MPI_COMM_NULL ? getCommRank(cartesianComm) : -1; +} + +static std::array getTaskPosition(MPI_Comm comm) { + std::array taskPos{-1, -1, -1}; + if (comm != MPI_COMM_NULL) { + const int rank = getCommRank(comm); + mpiCheck(MPI_Cart_coords(comm, rank, taskPos.size(), taskPos.data()), "Rank ", rank, + " unable to determine own position in cartesian communicator when attempting to create FsGrid!"); + } + return taskPos; +} + +template +static std::array generateMPITypes(const std::array& storageSize, + const std::array& localSize, int32_t stencilSize, + bool generateForSend) { + MPI_Datatype baseType; + mpiCheck(MPI_Type_contiguous(sizeof(T), MPI_BYTE, &baseType), "Failed to create a contiguous data type"); + const std::array reverseStorageSize = { + storageSize[2], + storageSize[1], + storageSize[0], + }; + std::array types; + types.fill(MPI_DATATYPE_NULL); + + for (uint32_t i = 0; i < 27; i++) { + const auto x = linearToX(i); + const auto y = linearToY(i); + const auto z = linearToZ(i); + + const bool self = x == 0 && y == 0 && z == 0; + const bool flatX = storageSize[0] == 1 && x != 0; + const bool flatY = storageSize[1] == 1 && y != 0; + const bool flatZ = storageSize[2] == 1 && z != 0; + const bool skip = flatX || flatY || flatZ || self; + + if (skip) { + continue; + } + + const std::array reverseSubarraySize = { + (z == 0) ? localSize[2] : stencilSize, + (y == 0) ? localSize[1] : stencilSize, + (x == 0) ? localSize[0] : stencilSize, + }; + + const std::array reverseSubarrayStart = [&]() { + if (generateForSend) { + return std::array{ + storageSize[2] == 1 ? 0 : (z == 1 ? storageSize[2] - 2 * stencilSize : stencilSize), + storageSize[1] == 1 ? 0 : (y == 1 ? storageSize[1] - 2 * stencilSize : stencilSize), + storageSize[0] == 1 ? 0 : (x == 1 ? storageSize[0] - 2 * stencilSize : stencilSize), + }; + } else { + return std::array{ + storageSize[2] == 1 ? 0 : (z == -1 ? storageSize[2] - stencilSize : (z == 0 ? stencilSize : 0)), + storageSize[1] == 1 ? 0 : (y == -1 ? storageSize[1] - stencilSize : (y == 0 ? stencilSize : 0)), + storageSize[0] == 1 ? 0 : (x == -1 ? storageSize[0] - stencilSize : (x == 0 ? stencilSize : 0)), + }; + } + }(); + + mpiCheck(MPI_Type_create_subarray(3, reverseStorageSize.data(), reverseSubarraySize.data(), + reverseSubarrayStart.data(), MPI_ORDER_C, baseType, &(types[i])), + "Failed to create a subarray type"); + mpiCheck(MPI_Type_commit(&(types[i])), "Failed to commit MPI type"); + } + + mpiCheck(MPI_Type_free(&baseType), "Couldn't free the basetype used to create the sendTypes"); + + return types; +} + +static std::vector taskPosToTask(MPI_Comm parentComm, MPI_Comm cartesianComm, + const std::array& numTasksPerDim) { + std::vector tasks(static_cast(numTasksPerDim[0] * numTasksPerDim[1] * numTasksPerDim[2])); + if (cartesianComm != MPI_COMM_NULL) { + size_t i = 0; + for (auto x = 0; x < numTasksPerDim[0]; x++) { + for (auto y = 0; y < numTasksPerDim[1]; y++) { + for (auto z = 0; z < numTasksPerDim[2]; z++) { + const std::array coords = {x, y, z}; + mpiCheck(MPI_Cart_rank(cartesianComm, coords.data(), &tasks[i++]), + "Unable to get rank from cartesian communicator"); + } + } + } + } + + mpiCheck(MPI_Bcast(static_cast(tasks.data()), static_cast(tasks.size()), MPI_INT, 0, parentComm), + "Unable to broadcast task pos array"); + + return tasks; +} + +static BitMask32 makeNeigbourBitMask(int32_t rank, const std::array& neighbourIndexToRank) { + auto getNeighbourBit = [&rank, &neighbourIndexToRank](uint32_t neighbourIndex) { + const auto neighbourRank = neighbourIndexToRank[neighbourIndex]; + const auto neighbourIsSelf = neighbourRank == rank; + return static_cast(neighbourIsSelf) << neighbourIndex; + }; + + // Self is index 13, leave that bit to zero + uint32_t bits = 0; + for (auto i = 0u; i < 13u; i++) { + bits |= getNeighbourBit(i); + } + + for (auto i = 14u; i < 27u; i++) { + bits |= getNeighbourBit(i); + } + + return BitMask32(bits); +} + +static BitMask32 makeNeigbourIsNullBitMask(const std::array& neighbourIndexToRank) { + auto getNeighbourBit = [&neighbourIndexToRank](uint32_t neighbourIndex) { + const auto neighbourRank = neighbourIndexToRank[neighbourIndex]; + return static_cast(neighbourRank == MPI_PROC_NULL) << neighbourIndex; + }; + + // Self is index 13, leave that bit to zero + uint32_t bits = 0; + for (auto i = 0u; i < 13u; i++) { + bits |= getNeighbourBit(i); + } + + for (auto i = 14u; i < 27u; i++) { + bits |= getNeighbourBit(i); + } + + return BitMask32(bits); +} + +static std::array computeStencilMultipliers(const Coordinates& coordinates) { + return { + static_cast(coordinates.globalSize[0] > 1), + static_cast(coordinates.globalSize[1] > 1) * coordinates.storageSize[0], + static_cast(coordinates.globalSize[2] > 1) * coordinates.storageSize[0] * coordinates.storageSize[1], + }; +} + +static int32_t computeStencilOffset(const Coordinates& coordinates) { + const auto muls = computeStencilMultipliers(coordinates); + return coordinates.numGhostCells * (muls[0] + muls[1] + muls[2]); +} +} // namespace fsgrid_detail + +/*! Simple cartesian, non-loadbalancing MPI Grid for use with the fieldsolver + * + * \param T datastructure containing the field in each cell which this grid manages + * \param stencil ghost cell width of this grid + */ +namespace fsgrid { +using namespace fsgrid_detail; + +template class FsGrid { +public: + /*! Constructor for this grid. + * \param globalSize Cell size of the global simulation domain. + * \param MPI_Comm The MPI communicator this grid should use. + * \param periodic An array specifying, for each dimension, whether it is to be treated as periodic. + */ + FsGrid(const std::array& globalSize, MPI_Comm parentComm, int32_t numProcs, + const std::array& periodic, const std::array& physicalGridSpacing, + const std::array& physicalGlobalStart, const std::array& decomposition = {0, 0, 0}) + : numProcs(numProcs), + comm3d(createCartesianCommunicator( + parentComm, computeNumTasksPerDim(globalSize, decomposition, numProcs, stencil), periodic, numProcs)), + rank(getCartesianRank(comm3d)), coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, getTaskPosition(comm3d), numProcs, stencil), + tasks(taskPosToTask(parentComm, comm3d, coordinates.numTasksPerDim)), + neighbourIndexToRank( + mapNeigbourIndexToRank(getTaskPosition(comm3d), coordinates.numTasksPerDim, periodic, comm3d, rank)), + neighbourRankToIndex(mapNeighbourRankToIndex(neighbourIndexToRank, numProcs)), + stencilConstants(coordinates.localSize, computeStencilMultipliers(coordinates), + computeStencilOffset(coordinates), stencil, makeNeigbourBitMask(rank, neighbourIndexToRank), + makeNeigbourIsNullBitMask(neighbourIndexToRank)) {} + + template auto getMPITypes() { + auto bytes = sizeof(D); + if (neighbourMPITypes.find(bytes) == neighbourMPITypes.end()) { + auto sendType = generateMPITypes(coordinates.storageSize, coordinates.localSize, stencil, true); + auto receiveType = generateMPITypes(coordinates.storageSize, coordinates.localSize, stencil, false); + std::tuple, std::array> types = {sendType, receiveType}; + neighbourMPITypes[bytes] = types; + return types; + } + return neighbourMPITypes.at(bytes); + } + + ~FsGrid() { finalize(); } + + void finalize() noexcept { + // If not a non-FS process + if (rank != -1) { + for (auto& it : neighbourMPITypes) { + auto [sendTypeArray, recvTypeArray] = it.second; + for (auto& type : sendTypeArray) { + if (type != MPI_DATATYPE_NULL) { + mpiCheck(MPI_Type_free(&type), "Failed to free MPI type"); + } + } + for (auto& type : recvTypeArray) { + if (type != MPI_DATATYPE_NULL) { + mpiCheck(MPI_Type_free(&type), "Failed to free MPI type"); + } + } + } + neighbourMPITypes.clear(); + } + + if (comm3d != MPI_COMM_NULL) + mpiCheck(MPI_Comm_free(&comm3d), "Failed to free MPI comm3d"); + } + + // ============================ + // Coordinate change functions + // - Redirected to Coordinates' implementation + // ============================ + template auto globalIDFromLocalCoordinates(Args... args) const { + return coordinates.globalIDFromLocalCoordinates(args...); + } + template auto localIDFromLocalCoordinates(Args... args) const { + return coordinates.localIDFromLocalCoordinates(args...); + } + template auto globalToLocal(Args... args) const { return coordinates.globalToLocal(args...); } + template auto localToGlobal(Args... args) const { return coordinates.localToGlobal(args...); } + template auto localCoordsFromStencilID(Args... args) const { return coordinates.localCoordsFromStencilID(args...); } + template auto getPhysicalCoords(Args... args) const { + return coordinates.getPhysicalCoords(args...); + } + template auto physicalToGlobal(Args... args) const { + return coordinates.physicalToGlobal(args...); + } + template auto physicalToCellFractional(Args... args) const { + return coordinates.physicalToCellFractional(args...); + } + + /*! Returns the task responsible for handling the cell with the given GlobalID + * \param id GlobalID of the cell for which task is to be determined + * \return a task for the grid's cartesian communicator + */ + Task_t getTaskForGlobalID(GlobalID id) const { + const auto taskPos = coordinates.globalIdToTaskPos(id); + const int32_t i = taskPos[0] * (coordinates.numTasksPerDim[1] * coordinates.numTasksPerDim[2]) + + taskPos[1] * coordinates.numTasksPerDim[2] + taskPos[2]; + return tasks[static_cast(i)]; + } + + FsStencil makeStencil(int32_t x, int32_t y, int32_t z) const { return FsStencil(x, y, z, stencilConstants); } + + // ============================ + // Getters + // ============================ + auto getNumCells() const { return coordinates.localSize[0] * coordinates.localSize[1] * coordinates.localSize[2]; } + auto getNumStorageCells() const { + return coordinates.storageSize[0] * coordinates.storageSize[1] * coordinates.storageSize[2]; + } + const auto& getLocalSize() const { return coordinates.localSize; } + const auto& getLocalStart() const { return coordinates.localStart; } + const auto& getGlobalSize() const { return coordinates.globalSize; } + Task_t getRank() const { return rank; } + Task_t getNumFsRanks() const { return numProcs; } + const auto& getPeriodic() const { return coordinates.periodic; } + const auto& getDecomposition() const { return coordinates.numTasksPerDim; } + const auto& getGridSpacing() const { return coordinates.physicalGridSpacing; } + + // ============================ + // MPI functions + // ============================ + + /*! Perform ghost cell communication. + */ + template void updateGhostCells(std::span data) { + if (comm3d == MPI_COMM_NULL) { + return; + } + + auto [neighbourSendType, neighbourReceiveType] = getMPITypes(); + + std::array receiveRequests; + std::array sendRequests; + receiveRequests.fill(MPI_REQUEST_NULL); + sendRequests.fill(MPI_REQUEST_NULL); + + for (uint32_t shiftId = 0; shiftId < 27; shiftId++) { + const auto receiveId = 26 - shiftId; + const auto receiveFrom = neighbourIndexToRank[receiveId]; + const auto sendType = neighbourSendType[shiftId]; + const auto receiveType = neighbourReceiveType[shiftId]; + // Is this a bug? Should the check be on receiveType, not sendType? It has been like this since 2016 + if (receiveFrom != MPI_PROC_NULL && sendType != MPI_DATATYPE_NULL) { + mpiCheck(MPI_Irecv(data.data(), 1, receiveType, receiveFrom, shiftId, comm3d, &(receiveRequests[shiftId])), + "Rank ", rank, " failed to receive data from neighbor ", receiveId, " with rank ", receiveFrom); + } + } + + for (uint32_t shiftId = 0; shiftId < 27; shiftId++) { + const auto sendTo = neighbourIndexToRank[shiftId]; + const auto sendType = neighbourSendType[shiftId]; + if (sendTo != MPI_PROC_NULL && sendType != MPI_DATATYPE_NULL) { + mpiCheck(MPI_Isend(data.data(), 1, sendType, sendTo, shiftId, comm3d, &(sendRequests[shiftId])), "Rank ", + rank, " failed to send data to neighbor ", shiftId, " with rank ", sendTo); + } + } + + mpiCheck(MPI_Waitall(27, receiveRequests.data(), MPI_STATUSES_IGNORE), + "Synchronization at ghost cell update failed"); + mpiCheck(MPI_Waitall(27, sendRequests.data(), MPI_STATUSES_IGNORE), + "Synchronization at ghost cell update failed"); + } + + /*! Perform an MPI_Allreduce with this grid's internal communicator + * Function syntax is identical to MPI_Allreduce, except the final (communicator + * argument will not be needed) */ + int32_t Allreduce(void* sendbuf, void* recvbuf, int32_t count, MPI_Datatype datatype, MPI_Op op) const { + // If a normal FS-rank + if (comm3d != MPI_COMM_NULL) { + return MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm3d); + } + // If a non-FS rank, no need to communicate + else { + int32_t datatypeSize; + MPI_Type_size(datatype, &datatypeSize); + for (int32_t i = 0; i < count * datatypeSize; ++i) + ((char*)recvbuf)[i] = ((char*)sendbuf)[i]; + return MPI_ERR_RANK; // This is ok for a non-FS rank + } + } + + /*! Parallelised for loop interface */ + template + void parallel_for(TimerCallBack timerCallBack, int timerId, std::span technical, Lambda loop_body) { + // Using raw pointer for localSize; + // Workaround intel compiler bug in collapsed openmp loops + // see https://github.com/fmihpc/vlasiator/commit/604c81142729c5025a0073cd5dc64a24882f1675 + const FsIndex_t* localSize = &coordinates.localSize[0]; + +#pragma omp parallel + { + auto timer = timerCallBack(timerId); +#pragma omp for collapse(2) + for (auto k = 0; k < localSize[2]; k++) { + for (auto j = 0; j < localSize[1]; j++) { + for (auto i = 0; i < localSize[0]; i++) { + const auto s = makeStencil(i, j, k); + const auto& tech = technical[s.ooo()]; + const auto sysBoundaryFlag = tech.sysBoundaryFlag; + const auto sysBoundaryLayer = tech.sysBoundaryLayer; + loop_body(coordinates, s, sysBoundaryFlag, sysBoundaryLayer); + } + } + } + timer.stop(getNumCells(), "Spatial Cells"); + } + } + +/*! Same as above parallel_for but without parallelization */ + template + void serial_for(TimerCallBack timerCallBack, int timerId, std::span technical, Lambda loop_body) { + // Using raw pointer for localSize; + // Workaround intel compiler bug in collapsed openmp loops + // see https://github.com/fmihpc/vlasiator/commit/604c81142729c5025a0073cd5dc64a24882f1675 + const FsIndex_t* localSize = &coordinates.localSize[0]; + + auto timer = timerCallBack(timerId); + for (auto k = 0; k < localSize[2]; k++) { + for (auto j = 0; j < localSize[1]; j++) { + for (auto i = 0; i < localSize[0]; i++) { + const auto s = makeStencil(i, j, k); + const auto& tech = technical[s.ooo()]; + const auto sysBoundaryFlag = tech.sysBoundaryFlag; + const auto sysBoundaryLayer = tech.sysBoundaryLayer; + loop_body(coordinates, s, sysBoundaryFlag, sysBoundaryLayer); + } + } + } + timer.stop(getNumCells(), "Spatial Cells"); + } + + /* Similar to parallel_for above but allows e.g. an alternative implementation for side-by-side comparison. */ + template + void experimental_for(TimerCallBack timerCallBack, int timerId, std::span technical, Lambda loop_body) { + // Using raw pointer for localSize; + // Workaround intel compiler bug in collapsed openmp loops + // see https://github.com/fmihpc/vlasiator/commit/604c81142729c5025a0073cd5dc64a24882f1675 + const FsIndex_t* localSize = &coordinates.localSize[0]; + +#pragma omp parallel + { + auto timer = timerCallBack(timerId); +#pragma omp for collapse(3) + for (auto k = 0; k < localSize[2]; k++) { + for (auto j = 0; j < localSize[1]; j++) { + for (auto i = 0; i < localSize[0]; i++) { + const auto s = makeStencil(i, j, k); + const auto& tech = technical[s.ooo()]; + const auto sysBoundaryFlag = tech.sysBoundaryFlag; + const auto sysBoundaryLayer = tech.sysBoundaryLayer; + loop_body(coordinates, s, sysBoundaryFlag, sysBoundaryLayer); + } + } + } + timer.stop(getNumCells(), "Spatial Cells"); + } + } + + /*! Parallelised reduction loop interface */ + template + REAL parallel_reduction(TimerCallBack timerCallBack, int timerId, std::span technical, Reducer reducer, const REAL neutral, Lambda loop_body) { + // Using raw pointer for localSize; + // Workaround intel compiler bug in collapsed openmp loops + // see https://github.com/fmihpc/vlasiator/commit/604c81142729c5025a0073cd5dc64a24882f1675 + const FsIndex_t* localSize = &coordinates.localSize[0]; + REAL result = neutral; +#pragma omp parallel + { + auto timer = timerCallBack(timerId); + REAL thread_result = neutral; +#pragma omp for collapse(2) + for (auto k = 0; k < localSize[2]; k++) { + for (auto j = 0; j < localSize[1]; j++) { + for (auto i = 0; i < localSize[0]; i++) { + const auto s = makeStencil(i, j, k); + const auto& tech = technical[s.ooo()]; + const auto sysBoundaryFlag = tech.sysBoundaryFlag; + const auto sysBoundaryLayer = tech.sysBoundaryLayer; + thread_result = reducer(thread_result, loop_body(coordinates, s, sysBoundaryFlag, sysBoundaryLayer, neutral)); + } + } + } +#pragma omp critical + { + result = reducer(result, thread_result); + } + timer.stop(getNumCells(), "Spatial Cells"); + } + return result; + } + +/*! Same as above parallel_for but without parallelization */ + template + REAL serial_reduction(TimerCallBack timerCallBack, int timerId, std::span technical, Reducer reducer, const REAL neutral, Lambda loop_body) { + // Using raw pointer for localSize; + // Workaround intel compiler bug in collapsed openmp loops + // see https://github.com/fmihpc/vlasiator/commit/604c81142729c5025a0073cd5dc64a24882f1675 + auto timer = timerCallBack(timerId); + const FsIndex_t* localSize = &coordinates.localSize[0]; + REAL result = neutral; + for (auto k = 0; k < localSize[2]; k++) { + for (auto j = 0; j < localSize[1]; j++) { + for (auto i = 0; i < localSize[0]; i++) { + const auto s = makeStencil(i, j, k); + const auto& tech = technical[s.ooo()]; + const auto sysBoundaryFlag = tech.sysBoundaryFlag; + const auto sysBoundaryLayer = tech.sysBoundaryLayer; + result = reducer(result, loop_body(coordinates, s, sysBoundaryFlag, sysBoundaryLayer, neutral)); + } + } + } + timer.stop(getNumCells(), "Spatial Cells"); + return result; + } + +private: + //! How many fieldsolver processes there are + const int32_t numProcs = 0; + //! MPI Cartesian communicator used in this grid + MPI_Comm comm3d = MPI_COMM_NULL; + //!< This task's rank in the communicator + const int32_t rank = 0; + + //!< A container for the coordinates of the fsgrid + const Coordinates coordinates = {}; + + //!< Task position to task mapping + const std::vector tasks = {}; + + //!< Lookup table from index to rank in the neighbour array (includes self) + const std::array neighbourIndexToRank = { + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + }; + //!< Lookup table from rank to index in the neighbour array + const std::vector neighbourRankToIndex = {}; + + //!< Type containing data computed from FsGrid values that are constant for all stencils + const StencilConstants stencilConstants = {}; + + //!< Datatypes for sending and receiving data + std::unordered_map, std::array>> neighbourMPITypes; +}; +} // namespace fsgrid diff --git a/src/stencil.hpp b/src/stencil.hpp new file mode 100644 index 0000000..7c9608b --- /dev/null +++ b/src/stencil.hpp @@ -0,0 +1,274 @@ +#pragma once + +/* + Copyright (C) 2016 Finnish Meteorological Institute + Copyright (C) 2016-2024 CSC -IT Center for Science + + This file is part of fsgrid + + fsgrid is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + fsgrid is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; + without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fsgrid. If not, see . +*/ + +#include +#include +#include + +namespace fsgrid { +struct BitMask32 { + constexpr BitMask32(uint32_t bits) : bits(bits) {} + constexpr uint32_t operator[](uint32_t i) const { + // Shifting by more than N - 1 is undefined behaviour for N bit values + constexpr uint32_t n = sizeof(bits) * 8; + const uint32_t mul = i < n; + i &= n - 1; + return mul * ((bits & (1u << i)) >> i); + } + +private: + const uint32_t bits = 0u; +}; + +struct StencilConstants { + const std::array limits = {}; + const std::array multipliers = {}; + const int32_t offset = 0; + const int32_t numGhostCells = 0; + const BitMask32 shift = 0; + const BitMask32 fallbackToCenter = 0; + + constexpr StencilConstants(const std::array& limits, const std::array& multipliers, + int32_t offset, int32_t numGhostCells, BitMask32 shift, BitMask32 fallbackToCenter) + : limits(limits), multipliers(multipliers), offset(offset), numGhostCells(numGhostCells), shift(shift), + fallbackToCenter(fallbackToCenter) {} + constexpr StencilConstants() {} +}; + +struct FsStencil { + const int32_t i = 0; + const int32_t j = 0; + const int32_t k = 0; + +private: + const StencilConstants constants = {}; + +public: + constexpr FsStencil(int32_t i, int32_t j, int32_t k, const StencilConstants& constants) + : i(i), j(j), k(k), constants(constants) {} + + // clang-format off + // These names come from the right hand rule, with + // - x horizontal + // - y on the line of sight +z +y + // - z vertical | / + // oop / + // and mpo__|_opo____ppo + // - m standing for "minus" / | / / + // - p for "plus" and / |/ / + // - o for "zero" or "origin" -x --moo ------ooo------ poo-- +x + // / /| / + // Examples: /_____ /_|____/ + // -1, 0, 0 = moo mmo omo | pmo + // +1, 0, 0 = poo / oom + // +1, +1, +1 = ppp / | + // 0, 0, 0 = ooo -y -z + + constexpr size_t ooo() const { return calculateIndex({i, j , k }); } + constexpr size_t oop() const { return calculateIndex({i, j , k + 1}); } + constexpr size_t oom() const { return calculateIndex({i, j , k - 1}); } + + constexpr size_t opo() const { return calculateIndex({i, j + 1, k }); } + constexpr size_t opp() const { return calculateIndex({i, j + 1, k + 1}); } + constexpr size_t opm() const { return calculateIndex({i, j + 1, k - 1}); } + + constexpr size_t omo() const { return calculateIndex({i, j - 1, k }); } + constexpr size_t omp() const { return calculateIndex({i, j - 1, k + 1}); } + constexpr size_t omm() const { return calculateIndex({i, j - 1, k - 1}); } + + constexpr size_t poo() const { return calculateIndex({i + 1, j , k }); } + constexpr size_t pop() const { return calculateIndex({i + 1, j , k + 1}); } + constexpr size_t pom() const { return calculateIndex({i + 1, j , k - 1}); } + + constexpr size_t ppo() const { return calculateIndex({i + 1, j + 1, k }); } + constexpr size_t ppp() const { return calculateIndex({i + 1, j + 1, k + 1}); } + constexpr size_t ppm() const { return calculateIndex({i + 1, j + 1, k - 1}); } + + constexpr size_t pmo() const { return calculateIndex({i + 1, j - 1, k }); } + constexpr size_t pmp() const { return calculateIndex({i + 1, j - 1, k + 1}); } + constexpr size_t pmm() const { return calculateIndex({i + 1, j - 1, k - 1}); } + + constexpr size_t moo() const { return calculateIndex({i - 1, j , k }); } + constexpr size_t mop() const { return calculateIndex({i - 1, j , k + 1}); } + constexpr size_t mom() const { return calculateIndex({i - 1, j , k - 1}); } + + constexpr size_t mpo() const { return calculateIndex({i - 1, j + 1, k }); } + constexpr size_t mpp() const { return calculateIndex({i - 1, j + 1, k + 1}); } + constexpr size_t mpm() const { return calculateIndex({i - 1, j + 1, k - 1}); } + + constexpr size_t mmo() const { return calculateIndex({i - 1, j - 1, k }); } + constexpr size_t mmp() const { return calculateIndex({i - 1, j - 1, k + 1}); } + constexpr size_t mmm() const { return calculateIndex({i - 1, j - 1, k - 1}); } + // clang-format on + + constexpr bool cellExists(int32_t io, int32_t jo, int32_t ko) const { + const auto x = i + io; + const auto y = j + jo; + const auto z = k + ko; + const auto no = neighbourOffset({x, y, z}); + const auto ni = neighbourIndex(no); + return -constants.numGhostCells <= x && x < constants.limits[0] + constants.numGhostCells && + -constants.numGhostCells <= y && y < constants.limits[1] + constants.numGhostCells && + -constants.numGhostCells <= z && z < constants.limits[2] + constants.numGhostCells && + static_cast(constants.fallbackToCenter[ni]) == 0; + } + + constexpr std::array indices() const { + // Return an array containing all indices + // x changes fastest, then y, then z + // clang-format off + return { + calculateIndex({i - 1, j - 1, k - 1}), + calculateIndex({i , j - 1, k - 1}), + calculateIndex({i + 1, j - 1, k - 1}), + calculateIndex({i - 1, j , k - 1}), + calculateIndex({i , j , k - 1}), + calculateIndex({i + 1, j , k - 1}), + calculateIndex({i - 1, j + 1, k - 1}), + calculateIndex({i , j + 1, k - 1}), + calculateIndex({i + 1, j + 1, k - 1}), + calculateIndex({i - 1, j - 1, k }), + calculateIndex({i , j - 1, k }), + calculateIndex({i + 1, j - 1, k }), + calculateIndex({i - 1, j , k }), + calculateIndex({i , j , k }), + calculateIndex({i + 1, j , k }), + calculateIndex({i - 1, j + 1, k }), + calculateIndex({i , j + 1, k }), + calculateIndex({i + 1, j + 1, k }), + calculateIndex({i - 1, j - 1, k + 1}), + calculateIndex({i , j - 1, k + 1}), + calculateIndex({i + 1, j - 1, k + 1}), + calculateIndex({i - 1, j , k + 1}), + calculateIndex({i , j , k + 1}), + calculateIndex({i + 1, j , k + 1}), + calculateIndex({i - 1, j + 1, k + 1}), + calculateIndex({i , j + 1, k + 1}), + calculateIndex({i + 1, j + 1, k + 1}), + }; + // clang-format on + } + + constexpr size_t indexFromOffset(int32_t io, int32_t jo, int32_t ko) const { + return calculateIndex({i + io, j + jo, k + ko}); + } + +private: + constexpr size_t calculateIndex(std::array cellIndex) const { + const auto no = neighbourOffset(cellIndex); + const auto ni = neighbourIndex(no); + cellIndex = fallback(offsetValues(cellIndex, no, ni), ni); + + return applyMultipliersAndOffset(cellIndex); + } + + constexpr std::array neighbourOffset(const std::array& cellIndex) const { + // clang-format off + // A triplet of (-, 0, +) values, with 27 possibilities + // The value for a coordinate is + // - if the coordinate is below zero, + // 0 if it's within bounds, or + // + if it's at or above limit + // + // Visualized as 2D slices: + // (values on the charts indicate (xyz) in order) + // + // +Z plane + // y + // ^ -++ 0++ +++ + // | -0+ 00+ +0+ + // | --+ 0-+ +-+ + // o-------------->x + // + // 0Z plane + // y + // ^ -+0 0+0 ++0 + // | -00 000 +00 + // | --0 0-0 +-0 + // o-------------->x + // + // -Z plane + // y + // ^ -+- 0+- ++- + // | -0- 00- +0- + // | --- 0-- +-- + // o-------------->x + // + // clang-format on + return { + (cellIndex[0] >= constants.limits[0]) - (cellIndex[0] < 0), + (cellIndex[1] >= constants.limits[1]) - (cellIndex[1] < 0), + (cellIndex[2] >= constants.limits[2]) - (cellIndex[2] < 0), + }; + } + + constexpr uint32_t neighbourIndex(const std::array& no) const { + // Translate a triplet of (-, 0, +) values to a single value between 0 and 26 + // - 0 is at (---) corner + // - 13 is at (000) i.e. center + // - 26 is at (+++) corner + // - z changes fastest, then y, then x + return static_cast(13 + no[0] * 9 + no[1] * 3 + no[2]); + } + + constexpr std::array offsetValues(const std::array& cellIndex, + const std::array& no, uint32_t ni) const { + // If the shift bit is 1 for neighbour index 'ni', add an offset to the given values + const auto addOffset = static_cast(constants.shift[ni]); + const auto offsets = shiftOffsets(no); + return { + cellIndex[0] + addOffset * offsets[0], + cellIndex[1] + addOffset * offsets[1], + cellIndex[2] + addOffset * offsets[2], + }; + } + + constexpr std::array shiftOffsets(const std::array& no) const { + // -limit, if the neihbour offset 'no' is + + // 0, if the neihbour offset is 0 + // +limit, if neihbour offset is - + return { + -no[0] * constants.limits[0], + -no[1] * constants.limits[1], + -no[2] * constants.limits[2], + }; + } + + constexpr std::array fallback(const std::array& cellIndex, uint32_t ni) const { + // If the cellIndex is invalid, we'll use the center cell, i.e. (i, j, k) + const auto invalid = static_cast(constants.fallbackToCenter[ni]); + const auto valid = invalid ^ 1; + return { + valid * cellIndex[0] + invalid * i, + valid * cellIndex[1] + invalid * j, + valid * cellIndex[2] + invalid * k, + }; + } + + constexpr size_t applyMultipliersAndOffset(const std::array& cellIndex) const { + // A dot product between cellIndex and constants.multipliers + an offset + return static_cast(constants.offset + constants.multipliers[0] * cellIndex[0] + + constants.multipliers[1] * cellIndex[1] + constants.multipliers[2] * cellIndex[2]); + } +}; +} // namespace fsgrid diff --git a/src/tools.hpp b/src/tools.hpp new file mode 100644 index 0000000..499cab1 --- /dev/null +++ b/src/tools.hpp @@ -0,0 +1,179 @@ +#pragma once + +/* + Copyright (C) 2016 Finnish Meteorological Institute + Copyright (C) 2016-2024 CSC -IT Center for Science + + This file is part of fsgrid + + fsgrid is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + fsgrid is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; + without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fsgrid. If not, see . +*/ +#include +#include +#include +#include +#include +#include +#include + +namespace fsgrid { +// Size type for global array indices +typedef uint32_t FsSize_t; +// Size type for global/local array indices, incl. possible negative values +typedef int32_t FsIndex_t; +typedef int64_t LocalID; +typedef int64_t GlobalID; +typedef int32_t Task_t; + +//! Helper function: calculate size of the local coordinate space for the given dimension +// \param numCells Number of cells in the global Simulation, in this dimension +// \param numTasks Total number of tasks in this dimension +// \param taskIndex This task's position in this dimension +// \return Number of cells for this task's local domain (actual cells, not counting ghost cells) +constexpr static FsIndex_t calcLocalSize(FsSize_t numCells, Task_t numTasks, Task_t taskIndex) { + const FsIndex_t nPerTask = static_cast(numCells) / numTasks; + const FsIndex_t remainder = static_cast(numCells) % numTasks; + return nPerTask + (taskIndex < remainder); +} + +//! Helper function: calculate position of the local coordinate space for the given dimension +// \param numCells number of cells +// \param numTasks Total number of tasks in this dimension +// \param taskIndex This task's position in this dimension +// \return Cell number at which this task's domains cells start (actual cells, not counting ghost cells) +constexpr static FsIndex_t calcLocalStart(FsSize_t numCells, Task_t numTasks, Task_t taskIndex) { + const FsIndex_t remainder = static_cast(numCells) % numTasks; + return taskIndex * calcLocalSize(numCells, numTasks, taskIndex) + (taskIndex >= remainder) * remainder; +} + +//! Helper function: given a global cellID, calculate the global cell coordinate from it. +// This is then used do determine the task responsible for this cell, and the +// local cell index in it. +constexpr static std::array globalIDtoCellCoord(GlobalID id, const std::array& globalSize) { + // We're returning FsIndex_t, which is int32_t. globalSizes are FsSize_t, which are uint32_t. + // Need to check that the globalSizes aren't larger than maximum of int32_t + const bool globalSizeOverflows = globalSize[0] > std::numeric_limits::max() || + globalSize[1] > std::numeric_limits::max() || + globalSize[2] > std::numeric_limits::max(); + const bool idNegative = id < 0; + + // To avoid overflow, do this instead of checking for id < product of globalSize + // The number of divisions stays the same anyway + const GlobalID idPerGs0 = id / globalSize[0]; + const GlobalID idPerGs0PerGs1 = idPerGs0 / globalSize[1]; + const bool idTooLarge = idPerGs0PerGs1 >= globalSize[2]; + + const bool badInput = idTooLarge || idNegative || globalSizeOverflows; + + if (badInput) { + // For bad input, return bad output + return { + std::numeric_limits::min(), + std::numeric_limits::min(), + std::numeric_limits::min(), + }; + } else { + return { + (FsIndex_t)(id % globalSize[0]), + (FsIndex_t)(idPerGs0 % globalSize[1]), + (FsIndex_t)(idPerGs0PerGs1 % globalSize[2]), + }; + } +} + +//! Helper function to optimize decomposition of this grid over the given number of tasks +constexpr static std::array computeDomainDecomposition(const std::array& globalSize, + Task_t nProcs, int32_t numGhostCells = 1) { + const std::array minDomainSize = { + globalSize[0] == 1 ? 1 : numGhostCells, + globalSize[1] == 1 ? 1 : numGhostCells, + globalSize[2] == 1 ? 1 : numGhostCells, + }; + const std::array maxDomainSize = { + std::min(nProcs, static_cast(globalSize[0] / static_cast(minDomainSize[0]))), + std::min(nProcs, static_cast(globalSize[1] / static_cast(minDomainSize[1]))), + std::min(nProcs, static_cast(globalSize[2] / static_cast(minDomainSize[2]))), + }; + + int64_t minimumCost = std::numeric_limits::max(); + std::array dd = {1, 1, 1}; + for (Task_t i = 1; i <= maxDomainSize[0]; i++) { + for (Task_t j = 1; j <= maxDomainSize[1]; j++) { + const Task_t k = nProcs / (i * j); + if (k == 0) { + break; + } + + // No need to optimize an incompatible DD, also checks for missing remainders + if (i * j * k != nProcs || k > maxDomainSize[2]) { + continue; + } + + const std::array processBox = { + calcLocalSize(globalSize[0], i, 0), + calcLocalSize(globalSize[1], j, 0), + calcLocalSize(globalSize[2], k, 0), + }; + + // clang-format off + const int64_t baseCost = (i > 1 ? processBox[1] * processBox[2] : 0) + + (j > 1 ? processBox[0] * processBox[2] : 0) + + (k > 1 ? processBox[0] * processBox[1] : 0); + const int64_t neighborMultiplier = ((i != 1 && j != 1 && k != 1) ? 13 : 1) + * ((i == 1 && j != 1 && k != 1) ? 4 : 1) + * ((i != 1 && j == 1 && k != 1) ? 4 : 1) + * ((i != 1 && j != 1 && k == 1) ? 4 : 1); + // clang-format on + const int64_t cost = baseCost * neighborMultiplier; + if (cost < minimumCost) { + minimumCost = cost; + dd = {i, j, k}; + } + } + } + + if (minimumCost == std::numeric_limits::max() || (Task_t)(dd[0] * dd[1] * dd[2]) != nProcs) { + throw std::runtime_error("FSGrid computeDomainDecomposition failed"); + } + + return dd; +} + +template void cerrArgs(Args...); +template <> inline void cerrArgs() {} +template void cerrArgs(T t) { std::cerr << t << "\n"; } +template void cerrArgs(Head head, Tail... tail) { + cerrArgs(head); + cerrArgs(tail...); +} + +// Recursively write all arguments to cerr if status is not success, then throw +template void writeToCerrAndThrowIfFailed(bool failed, Args... args) { + if (failed) { + cerrArgs(args...); + throw std::runtime_error("Unrecoverable error encountered in FsGrid, consult cerr for more information"); + } +} + +template void mpiCheck(int status, Args... args) { + writeToCerrAndThrowIfFailed(status != MPI_SUCCESS, args...); +} + +template void debugAssert([[maybe_unused]] bool condition, [[maybe_unused]] Args... args) { +#ifdef FSGRID_DEBUG + writeToCerrAndThrowIfFailed(condition, args...); +#endif +} +} // namespace fsgrid diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..cec0247 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.18) + +include(GoogleTest) + +set(data_dirname testdata) +set(data_src_dir ${CMAKE_CURRENT_SOURCE_DIR}/${data_dirname}) + +find_package(OpenMP) + +add_subdirectory(unit_tests) +add_subdirectory(mpi_tests) + +add_custom_target(testdata_symlink ALL + COMMAND ${CMAKE_COMMAND} -E create_symlink "${data_src_dir}" "${CMAKE_CURRENT_BINARY_DIR}/${data_dirname}" + COMMENT "Add symbolic link from ${data_src_dir} to ${CMAKE_CURRENT_BINARY_DIR}/${data_dirname}" + VERBATIM +) diff --git a/tests/Makefile b/tests/Makefile deleted file mode 100644 index 9001903..0000000 --- a/tests/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -CXX=mpic++ -# CXXFLAGS= -O3 -std=c++17 -ffast-math -march=native -g -Wall -CXXFLAGS= -O3 -std=c++17 -march=native -g -Wall -# CXXFLAGS= -O0 -std=c++17 -march=native -g -Wall - -all: clean ddtest - -benchmark: benchmark.cpp ../fsgrid.hpp - $(CXX) $(CXXFLAGS) -o $@ $< -test: test.cpp ../fsgrid.hpp - $(CXX) $(CXXFLAGS) -o $@ $< -ddtest: ddtest.cpp - $(CXX) $(CXXFLAGS) -o $@ $< - -clean: - -rm test ddtest benchmark diff --git a/tests/benchmark.cpp b/tests/benchmark.cpp deleted file mode 100644 index babf908..0000000 --- a/tests/benchmark.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include "../fsgrid.hpp" -/* - Copyright (C) 2016 Finnish Meteorological Institute - Copyright (C) 2016 CSC -IT Center for Science - - This file is part of fsgrid - - fsgrid is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - fsgrid is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; - without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with fsgrid. If not, see . -*/ - -template void timeit(std::array globalSize, std::array isPeriodic, int iterations){ - double t1,t2; - FsGridCouplingInformation gridCoupling; - FsGrid testGrid(globalSize, MPI_COMM_WORLD, isPeriodic, gridCoupling); - int rank,size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - testGrid.updateGhostCells(); - - MPI_Barrier(MPI_COMM_WORLD); - t1=MPI_Wtime(); - for(int i = 0; i < iterations; i++) { - testGrid.updateGhostCells(); - } - MPI_Barrier(MPI_COMM_WORLD); - t2=MPI_Wtime(); - if(rank==0) - printf("%g s per update: nprocs %d, grid is %d x %d x %d, stencil %d, element size %ld \n", (t2 - t1)/iterations, size, globalSize[0], globalSize[1], globalSize[2], stencil, sizeof(*testGrid.get(0,0,0))); -} - - -int main(int argc, char** argv) { - - MPI_Init(&argc,&argv); - - int rank,size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - - // Create a 8×8 Testgrid - std::array globalSize{2000,1000,1}; - std::array isPeriodic{false,false,true}; - - const int iterations = 200; - - timeit, 2>(globalSize, isPeriodic, iterations); - timeit, 2>(globalSize, isPeriodic, iterations); - timeit, 2>(globalSize, isPeriodic, iterations); - timeit, 2>(globalSize, isPeriodic, iterations); - timeit, 2>(globalSize, isPeriodic, iterations); - timeit, 2>(globalSize, isPeriodic, iterations); - timeit, 2>(globalSize, isPeriodic, iterations); - timeit, 2>(globalSize, isPeriodic, iterations); - - - MPI_Finalize(); - return 0; -} diff --git a/tests/ddtest.cpp b/tests/ddtest.cpp deleted file mode 100644 index 3240a7a..0000000 --- a/tests/ddtest.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - Copyright (C) 2016 Finnish Meteorological Institute - Copyright (C) 2016 CSC -IT Center for Science - - This file is part of fsgrid - - fsgrid is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - fsgrid is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; - without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with fsgrid. If not, see . -*/ - -#include -#include -#include -#include -#include -#include -#include "../fsgrid.hpp" - -int main(int argc, char **argv){ - - std::array sys; - std::array processDomainDecomposition; - - if(argc != 5) { - printf("Usage %s size_x size_y size_z nProcesses\n", argv[0]); - exit(1); - } - - - sys[0] = atof(argv[1]); - sys[1] = atof(argv[2]); - sys[2] = atof(argv[3]); - uint nProcs = atoi(argv[4]); - - FsGridTools::computeDomainDecomposition(sys, nProcs, processDomainDecomposition, 1, true); - printf("DD of %ld %ld %ld for %d processes is %ld %ld %ld \n", - sys[0], sys[1], sys[2], nProcs, - processDomainDecomposition[0], processDomainDecomposition[1], processDomainDecomposition[2]); - - - -} diff --git a/tests/mpi_tests/CMakeLists.txt b/tests/mpi_tests/CMakeLists.txt new file mode 100644 index 0000000..4a59d31 --- /dev/null +++ b/tests/mpi_tests/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.18) + +find_package(MPI REQUIRED) + +set(testname ${PROJECT_NAME}_mpi_tests) + +add_executable(${testname} + main.cpp + grid_tests.cpp +) + +target_include_directories(${testname} PRIVATE + "${PROJECT_SOURCE_DIR}/src" +) + +target_link_libraries( + ${testname} + GTest::gtest_main + MPI::MPI_CXX + OpenMP::OpenMP_CXX +) + +# Run the tests with mpiexec +# Need to add before gtest_add_tests to have effect + +# !!!! +# N.B. using gtest_discover_tests instead of gtest_add_tests +# adds the tests X times, +# where X is the number of processes used for mpi +# In other words "${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 4" +# runs all the discovered tests four times +# !!!! +set_property(TARGET ${testname} + PROPERTY CROSSCOMPILING_EMULATOR + ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPI_TEST_NUM_PROCS} ${MPI_TEST_EXTRA_FLAGS} + ) + +gtest_add_tests(TARGET ${testname}) diff --git a/tests/mpi_tests/grid_tests.cpp b/tests/mpi_tests/grid_tests.cpp new file mode 100644 index 0000000..2cf397b --- /dev/null +++ b/tests/mpi_tests/grid_tests.cpp @@ -0,0 +1,82 @@ +#include "grid.hpp" +#include +#include +#include + +TEST(FsGridTest, localToGlobalRoundtrip1) { + const std::array globalSize{1024, 666, 71}; + const MPI_Comm parentComm = MPI_COMM_WORLD; + const std::array periodic{true, true, false}; + auto numProcs = 0; + MPI_Comm_size(parentComm, &numProcs); + + const auto grid = fsgrid::FsGrid<1>(globalSize, parentComm, numProcs, periodic, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}); + const auto localSize = grid.getLocalSize(); + for (int32_t x = 0; x < localSize[0]; x++) { + for (int32_t y = 0; y < localSize[1]; y++) { + for (int32_t z = 0; z < localSize[2]; z++) { + const auto global = grid.localToGlobal(x, y, z); + const auto local = grid.globalToLocal(global[0], global[1], global[2]); + ASSERT_EQ(local[0], x); + ASSERT_EQ(local[1], y); + ASSERT_EQ(local[2], z); + } + } + } +} + +TEST(FsGridTest, myGlobalIDCorrespondsToMyTask) { + int rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + const std::array globalSize{6547, 16, 77}; + const MPI_Comm parentComm = MPI_COMM_WORLD; + const std::array periodic{true, false, false}; + auto numProcs = 0; + MPI_Comm_size(parentComm, &numProcs); + + const auto grid = fsgrid::FsGrid<1>(globalSize, parentComm, numProcs, periodic, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}); + const auto localSize = grid.getLocalSize(); + for (int32_t x = 0; x < localSize[0]; x++) { + for (int32_t y = 0; y < localSize[1]; y++) { + for (int32_t z = 0; z < localSize[2]; z++) { + const auto gid = grid.globalIDFromLocalCoordinates(x, y, z); + const auto task = grid.getTaskForGlobalID(gid); + ASSERT_EQ(task, rank); + ASSERT_EQ(task, rank); + ASSERT_EQ(task, rank); + } + } + } +} + +TEST(FsGridTest, getTaskForGlobalID1) { + const std::array globalSize{11, 5, 1048}; + const MPI_Comm parentComm = MPI_COMM_WORLD; + const std::array periodic{true, true, false}; + constexpr int32_t numGhostCells = 2; + auto numProcs = 0; + MPI_Comm_size(parentComm, &numProcs); + + auto grid = + fsgrid::FsGrid(globalSize, parentComm, numProcs, periodic, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}); + constexpr auto id = 666; + const auto task = grid.getTaskForGlobalID(id); + printf("Task for id %d: %d\n", id, task); + ASSERT_EQ(0, task); +} + +TEST(FsGridTest, getTaskForGlobalID2) { + const std::array globalSize{11, 5, 1048}; + const MPI_Comm parentComm = MPI_COMM_WORLD; + const std::array periodic{true, true, false}; + constexpr int32_t numGhostCells = 2; + auto numProcs = 4; + + auto grid = + fsgrid::FsGrid(globalSize, parentComm, numProcs, periodic, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}); + constexpr auto id = 666; + const auto task = grid.getTaskForGlobalID(id); + printf("Task for id %d: %d\n", id, task); + ASSERT_EQ(0, task); +} diff --git a/tests/mpi_tests/main.cpp b/tests/mpi_tests/main.cpp new file mode 100644 index 0000000..29f8524 --- /dev/null +++ b/tests/mpi_tests/main.cpp @@ -0,0 +1,10 @@ +#include +#include + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + MPI_Init(&argc, &argv); + auto result = RUN_ALL_TESTS(); + MPI_Finalize(); + return result; +} diff --git a/tests/test-dd.sh b/tests/test-dd.sh deleted file mode 100755 index d978eee..0000000 --- a/tests/test-dd.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -vals=("256 256 256", "256 256 128", "256 128 256", "128 256 256") -# vals=("1024 736 736") -nprocs=(32) -# nprocs=(2400) -for val in "${nprocs[@]}" -do - for dims in "${vals[@]}" - do - mpirun -n 1 ./ddtest $dims $val -# mpirun -n 1 ./ddtest 256 256 128 $val -# mpirun -n 1 ./ddtest 256 128 256 $val -# mpirun -n 1 ./ddtest 128 256 256 $val - done - -done - diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt new file mode 100644 index 0000000..a5b211a --- /dev/null +++ b/tests/unit_tests/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.18) + +find_package(MPI REQUIRED) + +set(testname ${PROJECT_NAME}_unit_tests) + +add_executable( + ${testname} + coordinates_tests.cpp + data_tests.cpp + fsstencil_tests.cpp + grid_tests.cpp + tools_tests.cpp +) + +target_include_directories(${testname} PRIVATE + "${PROJECT_SOURCE_DIR}/src" + ) + +target_link_libraries( + ${testname} + GTest::gtest_main + MPI::MPI_CXX + OpenMP::OpenMP_CXX +) + +gtest_discover_tests(${testname}) diff --git a/tests/unit_tests/coordinates_tests.cpp b/tests/unit_tests/coordinates_tests.cpp new file mode 100644 index 0000000..267c88a --- /dev/null +++ b/tests/unit_tests/coordinates_tests.cpp @@ -0,0 +1,331 @@ +#include "coordinates.hpp" + +#include + +TEST(CoordinatesTest, singleRankCoordinates) { + constexpr std::array physicalGridSpacing{0.1, 0.1, 0.1}; + constexpr std::array physicalGlobalStart{0.0, 0.0, 0.0}; + constexpr std::array globalSize{1024, 1, 512}; + constexpr std::array periodic{false, false, false}; + constexpr std::array decomposition{0, 0, 0}; + constexpr std::array taskPosition{0, 0, 0}; + constexpr int32_t numRanks = 1; + constexpr int32_t numGhostCells = 1; + + constexpr fsgrid::Coordinates coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, taskPosition, numRanks, numGhostCells); + + ASSERT_EQ(coordinates.numTasksPerDim[0], 1); + ASSERT_EQ(coordinates.numTasksPerDim[1], 1); + ASSERT_EQ(coordinates.numTasksPerDim[2], 1); + + ASSERT_EQ(coordinates.localStart[0], 0); + ASSERT_EQ(coordinates.localStart[1], 0); + ASSERT_EQ(coordinates.localStart[2], 0); + + ASSERT_EQ(coordinates.localSize[0], coordinates.globalSize[0]); + ASSERT_EQ(coordinates.localSize[1], coordinates.globalSize[1]); + ASSERT_EQ(coordinates.localSize[2], coordinates.globalSize[2]); + + auto i = 0; + for (auto z = 0; z < coordinates.localSize[2]; z++) { + for (auto y = 0; y < coordinates.localSize[1]; y++) { + for (auto x = 0; x < coordinates.localSize[0]; x++) { + const auto global = coordinates.localToGlobal(x, y, z); + ASSERT_EQ(global[0], x); + ASSERT_EQ(global[1], y); + ASSERT_EQ(global[2], z); + + const auto local = coordinates.globalToLocal(global[0], global[1], global[2]); + ASSERT_EQ(local[0], x); + ASSERT_EQ(local[1], y); + ASSERT_EQ(local[2], z); + + const auto physical = coordinates.getPhysicalCoords(x, y, z); + ASSERT_EQ(physical[0], x * physicalGridSpacing[0]); + ASSERT_EQ(physical[1], y * physicalGridSpacing[1]); + ASSERT_EQ(physical[2], z * physicalGridSpacing[2]); + + ASSERT_EQ(coordinates.globalIDFromLocalCoordinates(x, y, z), i++); + } + } + } +} + +TEST(CoordinatesTest, neighbourIndex) { + constexpr std::array physicalGridSpacing{0.1, 0.1, 0.1}; + constexpr std::array physicalGlobalStart{0.0, 0.0, 0.0}; + constexpr std::array globalSize{1024, 20, 512}; + constexpr std::array periodic{false, false, false}; + constexpr std::array decomposition{0, 0, 0}; + constexpr std::array taskPosition{0, 0, 0}; + constexpr int32_t numRanks = 16; + constexpr int32_t numGhostCells = 1; + + constexpr fsgrid::Coordinates coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, taskPosition, numRanks, numGhostCells); + + constexpr std::array xs{-numGhostCells, 0, coordinates.localSize[0] + 2 * numGhostCells - 1}; + constexpr std::array ys{-numGhostCells, 0, coordinates.localSize[1] + 2 * numGhostCells - 1}; + constexpr std::array zs{-numGhostCells, 0, coordinates.localSize[2] + 2 * numGhostCells - 1}; + + size_t i = 0; + for (auto x : xs) { + for (auto y : ys) { + for (auto z : zs) { + ASSERT_EQ(coordinates.neighbourIndexFromCellCoordinates(x, y, z), i++); + } + } + } +} + +TEST(CoordinatesTest, shiftCellIndices) { + constexpr std::array physicalGridSpacing{0.1, 0.1, 0.1}; + constexpr std::array physicalGlobalStart{0.0, 0.0, 0.0}; + constexpr std::array globalSize{1024, 20, 512}; + constexpr std::array periodic{false, false, false}; + constexpr std::array decomposition{0, 0, 0}; + constexpr std::array taskPosition{0, 0, 0}; + constexpr int32_t numRanks = 16; + constexpr int32_t numGhostCells = 1; + + constexpr fsgrid::Coordinates coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, taskPosition, numRanks, numGhostCells); + + constexpr std::array xs{-1, 0, coordinates.localSize[0] + 1}; + constexpr std::array ys{-1, 0, coordinates.localSize[1] + 1}; + constexpr std::array zs{-1, 0, coordinates.localSize[2] + 1}; + constexpr std::array, 27> values{ + std::array{ + coordinates.localSize[0] - 1, + coordinates.localSize[1] - 1, + coordinates.localSize[2] - 1, + }, + { + coordinates.localSize[0] - 1, + coordinates.localSize[1] - 1, + 0, + }, + { + coordinates.localSize[0] - 1, + coordinates.localSize[1] - 1, + 1, + }, + { + coordinates.localSize[0] - 1, + 0, + coordinates.localSize[2] - 1, + }, + { + coordinates.localSize[0] - 1, + 0, + 0, + }, + { + coordinates.localSize[0] - 1, + 0, + 1, + }, + { + coordinates.localSize[0] - 1, + 1, + coordinates.localSize[2] - 1, + }, + { + coordinates.localSize[0] - 1, + 1, + 0, + }, + { + coordinates.localSize[0] - 1, + 1, + 1, + }, + { + 0, + coordinates.localSize[1] - 1, + coordinates.localSize[2] - 1, + }, + { + 0, + coordinates.localSize[1] - 1, + 0, + }, + { + 0, + coordinates.localSize[1] - 1, + 1, + }, + { + 0, + 0, + coordinates.localSize[2] - 1, + }, + { + 0, + 0, + 0, + }, + { + 0, + 0, + 1, + }, + { + 0, + 1, + coordinates.localSize[2] - 1, + }, + { + 0, + 1, + 0, + }, + { + 0, + 1, + 1, + }, + { + 1, + coordinates.localSize[1] - 1, + coordinates.localSize[2] - 1, + }, + { + 1, + coordinates.localSize[1] - 1, + 0, + }, + { + 1, + coordinates.localSize[1] - 1, + 1, + }, + { + 1, + 0, + coordinates.localSize[2] - 1, + }, + { + 1, + 0, + 0, + }, + { + 1, + 0, + 1, + }, + { + 1, + 1, + coordinates.localSize[2] - 1, + }, + { + 1, + 1, + 0, + }, + { + 1, + 1, + 1, + }, + }; + + size_t i = 0; + for (auto x : xs) { + for (auto y : ys) { + for (auto z : zs) { + const auto shifted = coordinates.shiftCellIndices(x, y, z); + ASSERT_EQ(shifted[0], values[i][0]); + ASSERT_EQ(shifted[1], values[i][1]); + ASSERT_EQ(shifted[2], values[i][2]); + i++; + } + } + } +} + +TEST(CoordinatesTest, indicesWithinDomain1) { + constexpr std::array physicalGridSpacing{0.1, 0.1, 0.1}; + constexpr std::array physicalGlobalStart{0.0, 0.0, 0.0}; + constexpr std::array globalSize{1024, 20, 512}; + constexpr std::array periodic{false, false, false}; + constexpr std::array decomposition{0, 0, 0}; + constexpr std::array taskPosition{0, 0, 0}; + constexpr int32_t numRanks = 16; + constexpr int32_t numGhostCells = 1; + + constexpr fsgrid::Coordinates coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, taskPosition, numRanks, numGhostCells); + + constexpr std::array xs{-numGhostCells, 0, coordinates.localSize[0] + numGhostCells - 1}; + constexpr std::array ys{-numGhostCells, 0, coordinates.localSize[1] + numGhostCells - 1}; + constexpr std::array zs{-numGhostCells, 0, coordinates.localSize[2] + numGhostCells - 1}; + + for (auto x : xs) { + for (auto y : ys) { + for (auto z : zs) { + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(x, y, z)); + } + } + } +} + +TEST(CoordinatesTest, indicesWithinPeriodicDomain) { + constexpr std::array physicalGridSpacing{0.1, 0.1, 0.1}; + constexpr std::array physicalGlobalStart{0.0, 0.0, 0.0}; + constexpr std::array globalSize{1024, 1, 512}; + constexpr std::array periodic{false, true, false}; + constexpr std::array decomposition{0, 0, 0}; + constexpr std::array taskPosition{0, 0, 0}; + constexpr int32_t numRanks = 16; + constexpr int32_t numGhostCells = 1; + + constexpr fsgrid::Coordinates coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, taskPosition, numRanks, numGhostCells); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, -numGhostCells - 1, 0)); + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(0, -numGhostCells, 0)); + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(0, 0, 0)); + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(0, numGhostCells, 0)); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, numGhostCells + 1, 0)); +} + +TEST(CoordinatesTest, indicesNotWithinDomain) { + constexpr std::array physicalGridSpacing{0.1, 0.1, 0.1}; + constexpr std::array physicalGlobalStart{0.0, 0.0, 0.0}; + constexpr std::array globalSize{1024, 1, 512}; + constexpr std::array periodic{false, false, false}; + constexpr std::array decomposition{0, 0, 0}; + constexpr std::array taskPosition{0, 0, 0}; + constexpr int32_t numRanks = 16; + constexpr int32_t numGhostCells = 1; + + constexpr fsgrid::Coordinates coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, taskPosition, numRanks, numGhostCells); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, -numGhostCells - 1, 0)); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, -numGhostCells, 0)); + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(0, 0, 0)); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, numGhostCells, 0)); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, numGhostCells + 1, 0)); +} + +TEST(CoordinatesTest, indicesWithinNonPeriodicDomain) { + constexpr std::array physicalGridSpacing{0.1, 0.1, 0.1}; + constexpr std::array physicalGlobalStart{0.0, 0.0, 0.0}; + constexpr std::array globalSize{1024, 2, 512}; + constexpr std::array periodic{false, false, false}; + constexpr std::array decomposition{0, 0, 0}; + constexpr std::array taskPosition{0, 0, 0}; + constexpr int32_t numRanks = 16; + constexpr int32_t numGhostCells = 1; + + constexpr fsgrid::Coordinates coordinates(physicalGridSpacing, physicalGlobalStart, globalSize, periodic, + decomposition, taskPosition, numRanks, numGhostCells); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, -numGhostCells - 1, 0)); + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(0, -numGhostCells, 0)); + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(0, 0, 0)); + ASSERT_TRUE(coordinates.cellIndicesAreWithinBounds(0, coordinates.localSize[1] + numGhostCells - 1, 0)); + ASSERT_FALSE(coordinates.cellIndicesAreWithinBounds(0, coordinates.localSize[1] + numGhostCells, 0)); +} diff --git a/tests/unit_tests/data_tests.cpp b/tests/unit_tests/data_tests.cpp new file mode 100644 index 0000000..685223f --- /dev/null +++ b/tests/unit_tests/data_tests.cpp @@ -0,0 +1,102 @@ +#include "data.hpp" + +#include + +using namespace fsgrid; + +TEST(FsDataTest, size_set_correctly) { + constexpr size_t N = 10; + Data data(N); + ASSERT_EQ(data.size(), N); +} + +TEST(FsDataTest, view_works) { + constexpr size_t N = 10; + Data data(N); + + for (const auto& e : data.view()) { + ASSERT_EQ(e, 0.0f); + } + + for (auto& e : data.view()) { + e = 666.0f; + } + + for (const auto& e : data.view()) { + ASSERT_EQ(e, 666.0f); + } +} + +TEST(FsDataTest, swap_works) { + constexpr size_t N = 10; + Data data(N); + Data data2(2 * N); + + for (const auto& e : data.view()) { + ASSERT_EQ(e, 0.0f); + } + + for (const auto& e : data2.view()) { + ASSERT_EQ(e, 0.0f); + } + + for (auto& e : data.view()) { + e = 666.0f; + } + + using std::swap; + swap(data, data2); + + for (const auto& e : data.view()) { + ASSERT_EQ(e, 0.0f); + } + + for (const auto& e : data2.view()) { + ASSERT_EQ(e, 666.0f); + } + + ASSERT_EQ(data.size(), 2 * N); + ASSERT_EQ(data2.size(), N); +} + +TEST(FsDataTest, constructed_correctly_from_elements) { + constexpr size_t N = 10; + std::vector elements(N, 1.337f); + Data data(std::span{elements}); + ASSERT_EQ(data.size(), N); + for (const auto& e : data.view()) { + ASSERT_EQ(e, 1.337f); + } +} + +TEST(FsDataTest, data_is_correct) { + constexpr size_t N = 10; + Data data(N); + + for (const auto& e : data.view()) { + ASSERT_EQ(e, 0ul); + } + + size_t i = 0; + for (auto& e : data.view()) { + e = i++; + } + + for (size_t j = 0; j < data.size(); j++) { + ASSERT_EQ(*(data.data() + j), j); + } +} + +TEST(FsDataTest, indexing_works) { + constexpr size_t N = 10; + Data data(N); + + size_t i = 0; + for (auto& e : data.view()) { + e = i++; + } + + for (size_t j = 0; j < data.size(); j++) { + ASSERT_EQ(data.view()[j], data[j]); + } +} diff --git a/tests/unit_tests/fsstencil_tests.cpp b/tests/unit_tests/fsstencil_tests.cpp new file mode 100644 index 0000000..695d16b --- /dev/null +++ b/tests/unit_tests/fsstencil_tests.cpp @@ -0,0 +1,139 @@ +#include + +#include "stencil.hpp" + +TEST(BitMaskTest, unsetMask) { + constexpr fsgrid::BitMask32 mask(0); + for (uint32_t i = 0; i < 32; i++) { + ASSERT_EQ(mask[i], 0); + } +} + +TEST(BitMaskTest, bit1IsSet) { + constexpr fsgrid::BitMask32 mask(1); + ASSERT_EQ(mask[0], 1); + for (uint32_t i = 1; i < 32; i++) { + ASSERT_EQ(mask[i], 0); + } +} + +TEST(BitMaskTest, bits1and2AreSet) { + constexpr fsgrid::BitMask32 mask(3); + ASSERT_EQ(mask[0], 1); + ASSERT_EQ(mask[1], 1); + for (uint32_t i = 2; i < 32; i++) { + ASSERT_EQ(mask[i], 0); + } +} + +TEST(BitMaskTest, allBitsAreSet) { + constexpr fsgrid::BitMask32 mask(~0u); + for (uint32_t i = 0; i < 32; i++) { + ASSERT_EQ(mask[i], 1); + } +} + +TEST(BitMaskTest, tooLargeIndexGivesZero) { + constexpr fsgrid::BitMask32 mask(~0u); + ASSERT_EQ(mask[32], 0); +} + +TEST(FsStencilTest, cellExistsWhenFallBackBitsAreZeroAndNumGhostCellsIsOne) { + constexpr fsgrid::StencilConstants sc({1, 1, 1}, {0, 0, 0}, 0, 1, 0, 0); + constexpr fsgrid::FsStencil s(0, 0, 0, sc); + + for (int32_t x = -1; x < 2; x++) { + for (int32_t y = -1; y < 2; y++) { + for (int32_t z = -1; z < 2; z++) { + ASSERT_TRUE(s.cellExists(x, y, z)); + } + } + } +} + +TEST(FsStencilTest, cellDoesNotExistWhenFallBackBitsAreZeroAndNumGhostCellsIsZero) { + constexpr fsgrid::StencilConstants sc({1, 1, 1}, {0, 0, 0}, 0, 0, 0, 0); + constexpr fsgrid::FsStencil s(0, 0, 0, sc); + + for (int32_t x = -1; x < 2; x++) { + for (int32_t y = -1; y < 2; y++) { + for (int32_t z = -1; z < 2; z++) { + if (x == 0 && y == 0 && z == 0) { + ASSERT_TRUE(s.cellExists(x, y, z)); + } else { + ASSERT_FALSE(s.cellExists(x, y, z)); + } + } + } + } +} + +TEST(FsStencilTest, cellDoesNotExistsWhenFallBackBitsAreZeroAndNumGhostCellsIsOne) { + constexpr fsgrid::StencilConstants sc({1, 1, 1}, {0, 0, 0}, 0, 1, 0, 0); + constexpr fsgrid::FsStencil s(0, 0, 0, sc); + + for (int32_t x = -2; x < 3; x++) { + for (int32_t y = -2; y < 3; y++) { + for (int32_t z = -2; z < 3; z++) { + if (abs(x) < 2 && abs(y) < 2 && abs(z) < 2) { + ASSERT_TRUE(s.cellExists(x, y, z)); + } else { + ASSERT_FALSE(s.cellExists(x, y, z)); + } + } + } + } +} + +TEST(FsStencilTest, onlyCenterExistsWhenAllFallbackBitsButCenterAreOne) { + constexpr fsgrid::StencilConstants sc({1, 1, 1}, {0, 0, 0}, 0, 0, 0, 0b00000111111111111101111111111111); + constexpr fsgrid::FsStencil s(0, 0, 0, sc); + + for (int32_t x = -1; x < 2; x++) { + for (int32_t y = -1; y < 2; y++) { + for (int32_t z = -1; z < 2; z++) { + if (x == 0 && y == 0 && z == 0) { + ASSERT_TRUE(s.cellExists(x, y, z)); + } else { + ASSERT_FALSE(s.cellExists(x, y, z)); + } + } + } + } +} + +TEST(FsStencilTest, indicesAreCorrect1) { + // 3x3x3 cube with no ghost cells + constexpr fsgrid::StencilConstants sc({3, 3, 3}, {1, 3, 9}, 0, 0, 0, 0); + constexpr fsgrid::FsStencil s(1, 1, 1, sc); + + size_t j = 0; + for (const auto& i : s.indices()) { + ASSERT_EQ(i, j++); + } +} + +TEST(FsStencilTest, indicesAreCorrect2) { + // 3x3x3 cube with 1 ghost cell everywhere, so 5x5x5 cube with ghost cells + constexpr fsgrid::StencilConstants sc({3, 3, 3}, {1, 5, 25}, 0, 1, 0, 0); + constexpr fsgrid::FsStencil s(1, 1, 1, sc); + + // clang-format off + constexpr std::array indices = { + 0, 1, 2, + 5, 6, 7, + 10, 11, 12, + 25, 26, 27, + 30, 31, 32, + 35, 36, 37, + 50, 51, 52, + 55, 56, 57, + 60, 61, 62, + }; + // clang-format on + + size_t j = 0; + for (const auto& i : s.indices()) { + ASSERT_EQ(i, indices[j++]); + } +} diff --git a/tests/unit_tests/grid_tests.cpp b/tests/unit_tests/grid_tests.cpp new file mode 100644 index 0000000..794d265 --- /dev/null +++ b/tests/unit_tests/grid_tests.cpp @@ -0,0 +1,174 @@ +#include "grid.hpp" +#include + +TEST(FsGridTest, xyzToLinear) { + ASSERT_EQ(0, fsgrid_detail::xyzToLinear(-1, -1, -1)); + ASSERT_EQ(1, fsgrid_detail::xyzToLinear(-1, -1, 0)); + ASSERT_EQ(2, fsgrid_detail::xyzToLinear(-1, -1, 1)); + ASSERT_EQ(3, fsgrid_detail::xyzToLinear(-1, 0, -1)); + ASSERT_EQ(4, fsgrid_detail::xyzToLinear(-1, 0, 0)); + ASSERT_EQ(5, fsgrid_detail::xyzToLinear(-1, 0, 1)); + ASSERT_EQ(6, fsgrid_detail::xyzToLinear(-1, 1, -1)); + ASSERT_EQ(7, fsgrid_detail::xyzToLinear(-1, 1, 0)); + ASSERT_EQ(8, fsgrid_detail::xyzToLinear(-1, 1, 1)); + ASSERT_EQ(9, fsgrid_detail::xyzToLinear(0, -1, -1)); + ASSERT_EQ(10, fsgrid_detail::xyzToLinear(0, -1, 0)); + ASSERT_EQ(11, fsgrid_detail::xyzToLinear(0, -1, 1)); + ASSERT_EQ(12, fsgrid_detail::xyzToLinear(0, 0, -1)); + ASSERT_EQ(13, fsgrid_detail::xyzToLinear(0, 0, 0)); + ASSERT_EQ(14, fsgrid_detail::xyzToLinear(0, 0, 1)); + ASSERT_EQ(15, fsgrid_detail::xyzToLinear(0, 1, -1)); + ASSERT_EQ(16, fsgrid_detail::xyzToLinear(0, 1, 0)); + ASSERT_EQ(17, fsgrid_detail::xyzToLinear(0, 1, 1)); + ASSERT_EQ(18, fsgrid_detail::xyzToLinear(1, -1, -1)); + ASSERT_EQ(19, fsgrid_detail::xyzToLinear(1, -1, 0)); + ASSERT_EQ(20, fsgrid_detail::xyzToLinear(1, -1, 1)); + ASSERT_EQ(21, fsgrid_detail::xyzToLinear(1, 0, -1)); + ASSERT_EQ(22, fsgrid_detail::xyzToLinear(1, 0, 0)); + ASSERT_EQ(23, fsgrid_detail::xyzToLinear(1, 0, 1)); + ASSERT_EQ(24, fsgrid_detail::xyzToLinear(1, 1, -1)); + ASSERT_EQ(25, fsgrid_detail::xyzToLinear(1, 1, 0)); + ASSERT_EQ(26, fsgrid_detail::xyzToLinear(1, 1, 1)); +} + +TEST(FsGridTest, linearToX) { + ASSERT_EQ(-1, fsgrid_detail::linearToX(0)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(1)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(2)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(3)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(4)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(5)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(6)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(7)); + ASSERT_EQ(-1, fsgrid_detail::linearToX(8)); + ASSERT_EQ(0, fsgrid_detail::linearToX(9)); + ASSERT_EQ(0, fsgrid_detail::linearToX(10)); + ASSERT_EQ(0, fsgrid_detail::linearToX(11)); + ASSERT_EQ(0, fsgrid_detail::linearToX(12)); + ASSERT_EQ(0, fsgrid_detail::linearToX(13)); + ASSERT_EQ(0, fsgrid_detail::linearToX(14)); + ASSERT_EQ(0, fsgrid_detail::linearToX(15)); + ASSERT_EQ(0, fsgrid_detail::linearToX(16)); + ASSERT_EQ(0, fsgrid_detail::linearToX(17)); + ASSERT_EQ(1, fsgrid_detail::linearToX(18)); + ASSERT_EQ(1, fsgrid_detail::linearToX(19)); + ASSERT_EQ(1, fsgrid_detail::linearToX(20)); + ASSERT_EQ(1, fsgrid_detail::linearToX(21)); + ASSERT_EQ(1, fsgrid_detail::linearToX(22)); + ASSERT_EQ(1, fsgrid_detail::linearToX(23)); + ASSERT_EQ(1, fsgrid_detail::linearToX(24)); + ASSERT_EQ(1, fsgrid_detail::linearToX(25)); + ASSERT_EQ(1, fsgrid_detail::linearToX(26)); +} + +TEST(FsGridTest, linearToY) { + ASSERT_EQ(-1, fsgrid_detail::linearToY(0)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(1)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(2)); + ASSERT_EQ(0, fsgrid_detail::linearToY(3)); + ASSERT_EQ(0, fsgrid_detail::linearToY(4)); + ASSERT_EQ(0, fsgrid_detail::linearToY(5)); + ASSERT_EQ(1, fsgrid_detail::linearToY(6)); + ASSERT_EQ(1, fsgrid_detail::linearToY(7)); + ASSERT_EQ(1, fsgrid_detail::linearToY(8)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(9)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(10)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(11)); + ASSERT_EQ(0, fsgrid_detail::linearToY(12)); + ASSERT_EQ(0, fsgrid_detail::linearToY(13)); + ASSERT_EQ(0, fsgrid_detail::linearToY(14)); + ASSERT_EQ(1, fsgrid_detail::linearToY(15)); + ASSERT_EQ(1, fsgrid_detail::linearToY(16)); + ASSERT_EQ(1, fsgrid_detail::linearToY(17)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(18)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(19)); + ASSERT_EQ(-1, fsgrid_detail::linearToY(20)); + ASSERT_EQ(0, fsgrid_detail::linearToY(21)); + ASSERT_EQ(0, fsgrid_detail::linearToY(22)); + ASSERT_EQ(0, fsgrid_detail::linearToY(23)); + ASSERT_EQ(1, fsgrid_detail::linearToY(24)); + ASSERT_EQ(1, fsgrid_detail::linearToY(25)); + ASSERT_EQ(1, fsgrid_detail::linearToY(26)); +} + +TEST(FsGridTest, linearToZ) { + ASSERT_EQ(-1, fsgrid_detail::linearToZ(0)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(1)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(2)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(3)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(4)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(5)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(6)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(7)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(8)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(9)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(10)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(11)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(12)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(13)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(14)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(15)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(16)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(17)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(18)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(19)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(20)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(21)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(22)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(23)); + ASSERT_EQ(-1, fsgrid_detail::linearToZ(24)); + ASSERT_EQ(0, fsgrid_detail::linearToZ(25)); + ASSERT_EQ(1, fsgrid_detail::linearToZ(26)); +} + +TEST(FsGridTest, xyzToLinearToxyz) { + for (uint32_t i = 0; i < 27; i++) { + const auto x = fsgrid_detail::linearToX(i); + const auto y = fsgrid_detail::linearToY(i); + const auto z = fsgrid_detail::linearToZ(i); + ASSERT_EQ(i, fsgrid_detail::xyzToLinear(x, y, z)); + } +} + +TEST(FsGridTest, allNeighboursAreSelf) { + constexpr auto rank = 1; + // 13 is never self + constexpr std::array ranks = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }; + + for (auto i = 0u; i < 13u; i++) { + ASSERT_EQ(fsgrid_detail::makeNeigbourBitMask(rank, ranks)[i], 1); + } + + ASSERT_EQ(fsgrid_detail::makeNeigbourBitMask(rank, ranks)[13], 0); + + for (auto i = 14u; i < 27u; i++) { + ASSERT_EQ(fsgrid_detail::makeNeigbourBitMask(rank, ranks)[i], 1); + } +} + +TEST(FsGridTest, noneAreSelf) { + constexpr auto rank = 0; + // 13 is never self + constexpr std::array ranks = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }; + + for (auto i = 0u; i < 27u; i++) { + ASSERT_EQ(fsgrid_detail::makeNeigbourBitMask(rank, ranks)[i], 0); + } +} + +TEST(FsGridTest, firstAndLastAreSelf) { + constexpr auto rank = 0; + constexpr std::array ranks = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + }; + + ASSERT_EQ(fsgrid_detail::makeNeigbourBitMask(rank, ranks)[0], 1); + for (auto i = 1u; i < 26u; i++) { + ASSERT_EQ(fsgrid_detail::makeNeigbourBitMask(rank, ranks)[i], 0); + } + ASSERT_EQ(fsgrid_detail::makeNeigbourBitMask(rank, ranks)[26], 1); +} diff --git a/tests/unit_tests/tools_tests.cpp b/tests/unit_tests/tools_tests.cpp new file mode 100644 index 0000000..f3d43dc --- /dev/null +++ b/tests/unit_tests/tools_tests.cpp @@ -0,0 +1,248 @@ +#include "tools.hpp" + +#include +#include +#include + +TEST(FsGridToolsTests, calcLocalStart1) { + constexpr fsgrid::FsSize_t numGlobalCells = 1024u; + constexpr fsgrid::Task_t numTasks = 32u; + + ASSERT_EQ(fsgrid::calcLocalStart(numGlobalCells, numTasks, 0), 0); + ASSERT_EQ(fsgrid::calcLocalStart(numGlobalCells, numTasks, 1), 32); + ASSERT_EQ(fsgrid::calcLocalStart(numGlobalCells, numTasks, 2), 64); + ASSERT_EQ(fsgrid::calcLocalStart(numGlobalCells, numTasks, 3), 96); +} + +TEST(FsGridToolsTests, calcLocalStart2) { + constexpr fsgrid::FsSize_t numGlobalCells = 666u; + constexpr fsgrid::Task_t numTasks = 64u; + + for (int i = 0; i < 26; i++) { + ASSERT_EQ(fsgrid::calcLocalStart(numGlobalCells, numTasks, i), i * 11); + } + for (int i = 26; i < numTasks; i++) { + ASSERT_EQ(fsgrid::calcLocalStart(numGlobalCells, numTasks, i), i * 10 + 26); + } +} + +TEST(FsGridToolsTests, calcLocalSize1) { + constexpr fsgrid::FsSize_t numGlobalCells = 1024u; + constexpr fsgrid::Task_t numTasks = 32u; + + for (int i = 0; i < numTasks; i++) { + ASSERT_EQ(fsgrid::calcLocalSize(numGlobalCells, numTasks, i), 32); + } +} + +TEST(FsGridToolsTests, calcLocalSize2) { + constexpr fsgrid::FsSize_t numGlobalCells = 666u; + constexpr fsgrid::Task_t numTasks = 64u; + + for (int i = 0; i < 26; i++) { + ASSERT_EQ(fsgrid::calcLocalSize(numGlobalCells, numTasks, i), 11); + } + + for (int i = 26; i < numTasks; i++) { + ASSERT_EQ(fsgrid::calcLocalSize(numGlobalCells, numTasks, i), 10); + } +} + +TEST(FsGridToolsTests, globalIDtoCellCoord1) { + constexpr std::array globalSize = {3, 7, 45}; + + for (fsgrid::FsSize_t i = 0; i < globalSize[0] * globalSize[1] * globalSize[2]; i++) { + const std::array result = { + (fsgrid::FsIndex_t)(i % globalSize[0]), + (fsgrid::FsIndex_t)((i / globalSize[0]) % globalSize[1]), + (fsgrid::FsIndex_t)((i / (globalSize[0] * globalSize[1])) % globalSize[2]), + }; + ASSERT_EQ(fsgrid::globalIDtoCellCoord(i, globalSize), result); + } +} + +TEST(FsGridToolsTests, globalIDtoCellCoord_globalSize_would_overflow) { + constexpr uint32_t maxUint = std::numeric_limits::max(); + constexpr std::array globalSize = {maxUint, 1, 1}; + const std::array result = { + -2147483648, + -2147483648, + -2147483648, + }; + ASSERT_EQ(fsgrid::globalIDtoCellCoord(globalSize[0] - 1, globalSize), result); +} + +TEST(FsGridToolsTests, globalIDtoCellCoord_globalSize_is_maximum_int) { + constexpr int32_t maxInt = std::numeric_limits::max(); + constexpr std::array globalSize = {maxInt, maxInt, maxInt}; + + std::array result = {maxInt - 1, 0, 0}; + ASSERT_EQ(fsgrid::globalIDtoCellCoord(globalSize[0] - 1, globalSize), result); + + result = {0, 1, 0}; + ASSERT_EQ(fsgrid::globalIDtoCellCoord(globalSize[0], globalSize), result); + + result = {0, 0, 1}; + ASSERT_EQ(fsgrid::globalIDtoCellCoord((int64_t)globalSize[0] * globalSize[0], globalSize), result); + + result = {maxInt - 1, maxInt - 1, 0}; + ASSERT_EQ(fsgrid::globalIDtoCellCoord((int64_t)globalSize[0] * globalSize[0] - 1, globalSize), result); +} + +struct Decomposition { + int32_t x = 0u; + int32_t y = 0u; + int32_t z = 0u; +}; + +struct SystemSize { + uint32_t x = 0u; + uint32_t y = 0u; + uint32_t z = 0u; +}; + +Decomposition computeDecomposition(const SystemSize systemSize, const fsgrid::Task_t nProcs) { + const auto dd = fsgrid::computeDomainDecomposition( + { + systemSize.x, + systemSize.y, + systemSize.z, + }, + nProcs, 1); + + return Decomposition{dd[0], dd[1], dd[2]}; +} + +TEST(FsGridToolsTests, dd_size_256_256_256_nprocs_32) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 256, 256}, 32); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 32); +} + +TEST(FsGridToolsTests, dd_size_128_256_256_nprocs_32) { + const auto [x, y, z] = computeDecomposition(SystemSize{128, 256, 256}, 32); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 32); +} + +TEST(FsGridToolsTests, dd_size_256_128_256_nprocs_32) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 128, 256}, 32); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 32); +} + +TEST(FsGridToolsTests, dd_size_256_256_128_nprocs_32) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 256, 128}, 32); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 32); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_256_256_256_nprocs_1) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 256, 256}, 1); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_128_256_256_nprocs_1) { + const auto [x, y, z] = computeDecomposition(SystemSize{128, 256, 256}, 1); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_256_128_256_nprocs_1) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 128, 256}, 1); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_256_256_128_nprocs_1) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 256, 128}, 1); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_256_256_256_nprocs_64) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 256, 256}, 64); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 64); +} + +TEST(FsGridToolsTests, dd_size_128_256_256_nprocs_64) { + const auto [x, y, z] = computeDecomposition(SystemSize{128, 256, 256}, 64); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 64); +} + +TEST(FsGridToolsTests, dd_size_256_128_256_nprocs_64) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 128, 256}, 64); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 64); +} + +TEST(FsGridToolsTests, dd_size_256_256_128_nprocs_64) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 256, 128}, 64); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 64); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_1024_256_512_nprocs_64) { + const auto [x, y, z] = computeDecomposition(SystemSize{1024, 256, 512}, 64); + ASSERT_EQ(x, 64); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_256_512_128) { + const auto [x, y, z] = computeDecomposition(SystemSize{256, 512, 128}, 64); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 64); + ASSERT_EQ(z, 1); +} + +TEST(FsGridToolsTests, dd_size_64_128_256_nprocs_64) { + const auto [x, y, z] = computeDecomposition(SystemSize{64, 128, 256}, 64); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 64); +} + +TEST(FsGridToolsTests, dd_size_64_256_1024_nprocs_64) { + const auto [x, y, z] = computeDecomposition(SystemSize{64, 256, 1024}, 64); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 64); +} + +TEST(FsGridToolsTests, dd_size_65_17_100_nprocs_11) { + const auto [x, y, z] = computeDecomposition(SystemSize{65, 17, 100}, 11); + ASSERT_EQ(x, 1); + ASSERT_EQ(y, 1); + ASSERT_EQ(z, 11); +} + +TEST(FsGridToolsTests, MPI_err_check_should_throw) { + EXPECT_THROW( + { + try { + fsgrid::mpiCheck(MPI_SUCCESS + 1, "Should throw with unsuccessful check"); + } catch (const std::runtime_error& e) { + EXPECT_STREQ("Unrecoverable error encountered in FsGrid, consult cerr for more information", e.what()); + throw; + } + }, + std::runtime_error); +} + +TEST(FsGridToolsTests, MPI_err_check_should_pass) { fsgrid::mpiCheck(MPI_SUCCESS, "This should pass"); }