From 8010ea06fe3718be926ed06203af3ea66b349822 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 12 Apr 2022 11:39:02 +0100 Subject: [PATCH 1/7] #84 Add warm up iterations to NemoLite2D device accelerated manual implementations --- .../manual_versions/psykal_acc/nemolite2d.f90 | 76 ++++++++++--------- .../manual_versions/psykal_cpp/nemolite2d.f90 | 17 ++++- .../manual_versions/psykal_kokkos/Makefile | 4 +- .../psykal_kokkos/nemolite2d.f90 | 16 +++- .../psykal_kokkos/time_step_views_kokkos.cpp | 7 +- .../psykal_opencl/nemolite2d.f90 | 18 +++-- .../psykal_sycl/nemolite2d.f90 | 23 +++++- 7 files changed, 106 insertions(+), 55 deletions(-) diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 b/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 index e1c1f305..49428770 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 @@ -1,5 +1,5 @@ program gocean2d - use dl_timer + use dl_timer, only: timer_start, timer_stop, timer_init, timer_report, i_def64 use grid_mod use field_mod use initialisation_mod, only: initialisation @@ -7,11 +7,11 @@ program gocean2d use gocean2d_io_mod, only: model_write use gocean_mod, only: model_write_log, gocean_initialise, & gocean_finalise + !use likwid - !> GOcean2d is a Horizontal 2D hydrodynamic ocean model initially developed - !! by Hedong Liu, UK National Oceanography Centre (NOC), which: - !! 1) uses structured grid - !! 2) uses direct data addressing structures + !> A Horizontal 2D hydrodynamic ocean model which + !! 1) using structured grid + !! 2) using direct data addressing structures implicit none @@ -30,9 +30,13 @@ program gocean2d type(r2d_field) :: ua_fld, va_fld ! time stepping index - integer :: istp - integer :: itimer0 + integer :: istp + integer :: itimer0 + ! Scratch space for logging messages + character(len=160) :: log_str + + ! Initialise GOcean infrastructure call gocean_initialise() ! Create the model grid. We use a NE offset (i.e. the U, V and F @@ -45,6 +49,7 @@ program gocean2d !! read in model parameters and configure the model grid CALL model_init(model_grid) + !call likwid_markerInit() ! Create fields on this grid @@ -78,16 +83,35 @@ program gocean2d call model_write(model_grid, 0, ht_fld, sshn_t_fld, un_fld, vn_fld) + write(log_str, "('Simulation domain = (',I4,':',I4,',',I4,':',I4,')')") & + model_grid%subdomain%global%xstart, & + model_grid%subdomain%global%xstop, & + model_grid%subdomain%global%ystart, & + model_grid%subdomain%global%ystop + call model_write_log("((A))", TRIM(log_str)) + + ! Start timer for time-stepping section + CALL timer_start(itimer0, label='Warm up', & + num_repeats=INT(1,kind=i_def64) ) + + call step(nit000, & + ua_fld, va_fld, un_fld, vn_fld, & + sshn_t_fld, sshn_u_fld, sshn_v_fld, & + ssha_t_fld, ssha_u_fld, ssha_v_fld, & + hu_fld, hv_fld, ht_fld) + + ! Stop the timer for the time-stepping section + call timer_stop(itimer0) ! Start timer for time-stepping section CALL timer_start(itimer0, label='Time-stepping', & - num_repeats=int((nitend-nit000+1),8) ) + num_repeats=INT(nitend-nit000,kind=i_def64) ) !! time stepping - do istp = nit000, nitend, 1 + do istp = nit000+1, nitend, 1 !call model_write_log("('istp == ',I6)",istp) - call step(model_grid, istp, & + call step(istp, & ua_fld, va_fld, un_fld, vn_fld, & sshn_t_fld, sshn_u_fld, sshn_v_fld, & ssha_t_fld, ssha_u_fld, ssha_v_fld, & @@ -102,14 +126,15 @@ program gocean2d call timer_stop(itimer0) ! Compute and output some checksums for error checking - call model_write_log("('ua checksum = ',E16.8)", & + call model_write_log("('ua checksum = ', E16.8)", & field_checksum(ua_fld)) - call model_write_log("('va checksum = ',E16.8)", & + call model_write_log("('va checksum = ', E16.8)", & field_checksum(va_fld)) !! finalise the model run call model_finalise() - + !call likwid_markerClose() + call model_write_log("((A))", 'Simulation finished!!') call gocean_finalise() @@ -118,7 +143,7 @@ end program gocean2d !+++++++++++++++++++++++++++++++++++ -subroutine step(grid, istp, & +subroutine step(istp, & ua, va, un, vn, & sshn, sshn_u, sshn_v, ssha, ssha_u, ssha_v, & hu, hv, ht) @@ -128,7 +153,6 @@ subroutine step(grid, istp, & use time_step_mod, only: invoke_time_step use gocean2d_io_mod, only: model_write implicit none - type(grid_type), intent(in) :: grid !> The current time step integer, intent(in) :: istp type(r2d_field), intent(inout) :: un, vn, sshn, sshn_u, sshn_v @@ -139,27 +163,5 @@ subroutine step(grid, istp, & sshn, sshn_u, sshn_v, & hu, hv, ht, ua, va, un, vn) -! call invoke( & -! continuity(istp, ssha, sshn_t, sshn_u, sshn_v, & -! hu, hv, un, vn), & -! momentum_u(ua, un, vn, & -! ssha_u, sshn_t, sshn_u, sshn_v), & -! momentum_v(va, un, vn, hu, hv, ht, & -! ssha_v, sshn_t, sshn_u, sshn_v), & -! bc_ssh(istp, ssha), & -! bc_solid_u(ua), & -! bc_solid_v(va), & -! bc_flather_u(ua, hu, sshn_u), & -! bc_flather_v(va, hv, sshn_v), & -! copy_field(ua, un), & -! copy_field(va, vn), & -! copy_field(ssha, sshn_t), & -! next_sshu(sshn_u, sshn_t), & -! next_sshv(sshn_v, sshn_t) & -! ) - - -! call model_write(grid, istp, ht, sshn, un, vn) - end subroutine step diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_cpp/nemolite2d.f90 b/benchmarks/nemo/nemolite2d/manual_versions/psykal_cpp/nemolite2d.f90 index 96c9a30f..49428770 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_cpp/nemolite2d.f90 +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_cpp/nemolite2d.f90 @@ -31,7 +31,6 @@ program gocean2d ! time stepping index integer :: istp - real(go_wp) :: rstp integer :: itimer0 ! Scratch space for logging messages @@ -91,12 +90,24 @@ program gocean2d model_grid%subdomain%global%ystop call model_write_log("((A))", TRIM(log_str)) + ! Start timer for time-stepping section + CALL timer_start(itimer0, label='Warm up', & + num_repeats=INT(1,kind=i_def64) ) + + call step(nit000, & + ua_fld, va_fld, un_fld, vn_fld, & + sshn_t_fld, sshn_u_fld, sshn_v_fld, & + ssha_t_fld, ssha_u_fld, ssha_v_fld, & + hu_fld, hv_fld, ht_fld) + + ! Stop the timer for the time-stepping section + call timer_stop(itimer0) ! Start timer for time-stepping section CALL timer_start(itimer0, label='Time-stepping', & - num_repeats=INT(nitend-nit000+1,kind=i_def64) ) + num_repeats=INT(nitend-nit000,kind=i_def64) ) !! time stepping - do istp = nit000, nitend, 1 + do istp = nit000+1, nitend, 1 !call model_write_log("('istp == ',I6)",istp) diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile index 9be53b00..dffa828b 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile @@ -35,12 +35,12 @@ KOKKOS_ARCH = Volta70 # Pascal64 CFLAGS := -O3 # Still use the selected compiler but using the Kokkos nvcc_wrapper NVCC_WRAPPER_DEFAULT_COMPILER = $(CXX) -CXX := $(KOKKOS_PATH)/bin/nvcc_wrapper +CXX := $(KOKKOS_PATH)/bin/nvcc_wrapper -allow-unsupported-compiler # The enable lambda option is necessary for the nvcc compiler to recognise # as CUDA kernels the lambda-inlined functions. KOKKOS_CUDA_OPTIONS = "enable_lambda" # If CUDA_LIB is not provided, infer path from the nvcc compiler location. -CUDA_LIB ?= $(shell echo $(shell which nvcc) | sed 's/bin\/nvcc/lib64/g') +CUDA_ROOT ?= $(shell echo $(shell which nvcc) | sed 's/bin\/nvcc//g') else $(error "Unrecognised KOKKOS_DEVICES value: $(KOKKOS_DEVICES)") endif diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/nemolite2d.f90 b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/nemolite2d.f90 index c8109789..49428770 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/nemolite2d.f90 +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/nemolite2d.f90 @@ -90,12 +90,24 @@ program gocean2d model_grid%subdomain%global%ystop call model_write_log("((A))", TRIM(log_str)) + ! Start timer for time-stepping section + CALL timer_start(itimer0, label='Warm up', & + num_repeats=INT(1,kind=i_def64) ) + + call step(nit000, & + ua_fld, va_fld, un_fld, vn_fld, & + sshn_t_fld, sshn_u_fld, sshn_v_fld, & + ssha_t_fld, ssha_u_fld, ssha_v_fld, & + hu_fld, hv_fld, ht_fld) + + ! Stop the timer for the time-stepping section + call timer_stop(itimer0) ! Start timer for time-stepping section CALL timer_start(itimer0, label='Time-stepping', & - num_repeats=INT(nitend-nit000+1,kind=i_def64) ) + num_repeats=INT(nitend-nit000,kind=i_def64) ) !! time stepping - do istp = nit000, nitend, 1 + do istp = nit000+1, nitend, 1 !call model_write_log("('istp == ',I6)",istp) diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp index bbdd4651..918ab609 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp @@ -699,8 +699,11 @@ extern "C" void kokkos_read_from_device(double_2dview from, double * to, // Then, we copy the data from the mirror to the original location. // Since the mirror data layout is decided by kokkos, we make explicit // copies of each element to its location. - for(int jj=starty; jj < starty+ny; jj++){ - for(int ji=startx; ji < startx+nx; ji++){ + // We need to adjust the provided Fortran bounds to 0-indexing + int starty0 = starty - 1; + int startx0 = startx - 1; + for(int jj=starty0; jj < starty0+ny-1; jj++){ + for(int ji=startx0; ji < startx0+nx-1; ji++){ int idx = (jj * fortran_array_width + ji); to[idx] = mirror(jj, ji); } diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_opencl/nemolite2d.f90 b/benchmarks/nemo/nemolite2d/manual_versions/psykal_opencl/nemolite2d.f90 index a3e04177..5aa092b7 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_opencl/nemolite2d.f90 +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_opencl/nemolite2d.f90 @@ -50,7 +50,6 @@ program gocean2d !! read in model parameters and configure the model grid CALL model_init(model_grid) - !call likwid_markerInit() ! Create fields on this grid @@ -91,15 +90,22 @@ program gocean2d model_grid%subdomain%global%ystop call model_write_log("((A))", TRIM(log_str)) + ! Warming up step + CALL timer_start(itimer0, label='Warm up step', & + num_repeats=INT(1,kind=i_def64) ) + call step(nit000, & + ua_fld, va_fld, un_fld, vn_fld, & + sshn_t_fld, sshn_u_fld, sshn_v_fld, & + ssha_t_fld, ssha_u_fld, ssha_v_fld, & + hu_fld, hv_fld, ht_fld) + call timer_stop(itimer0) + ! Start timer for time-stepping section CALL timer_start(itimer0, label='Time-stepping', & - num_repeats=INT(nitend-nit000+1,kind=i_def64) ) + num_repeats=INT(nitend-nit000,kind=i_def64) ) !! time stepping - do istp = nit000, nitend, 1 - - !call model_write_log("('istp == ',I6)",istp) - rstp = real(istp, go_wp) + do istp = nit000+1, nitend, 1 call step(istp, & ua_fld, va_fld, un_fld, vn_fld, & diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_sycl/nemolite2d.f90 b/benchmarks/nemo/nemolite2d/manual_versions/psykal_sycl/nemolite2d.f90 index 604348a2..49428770 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_sycl/nemolite2d.f90 +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_sycl/nemolite2d.f90 @@ -7,6 +7,7 @@ program gocean2d use gocean2d_io_mod, only: model_write use gocean_mod, only: model_write_log, gocean_initialise, & gocean_finalise + !use likwid !> A Horizontal 2D hydrodynamic ocean model which !! 1) using structured grid @@ -30,7 +31,6 @@ program gocean2d ! time stepping index integer :: istp - real(go_wp) :: rstp integer :: itimer0 ! Scratch space for logging messages @@ -43,11 +43,13 @@ program gocean2d ! points immediately to the North and East of a T point all have the ! same i,j index). This is the same offset scheme as used by NEMO. model_grid = grid_type(GO_ARAKAWA_C, & + ! BC_PERIODIC, BC_NON_PERIODIC ?? (/GO_BC_EXTERNAL,GO_BC_EXTERNAL,GO_BC_NONE/), & GO_OFFSET_NE) !! read in model parameters and configure the model grid CALL model_init(model_grid) + !call likwid_markerInit() ! Create fields on this grid @@ -88,12 +90,26 @@ program gocean2d model_grid%subdomain%global%ystop call model_write_log("((A))", TRIM(log_str)) + ! Start timer for time-stepping section + CALL timer_start(itimer0, label='Warm up', & + num_repeats=INT(1,kind=i_def64) ) + + call step(nit000, & + ua_fld, va_fld, un_fld, vn_fld, & + sshn_t_fld, sshn_u_fld, sshn_v_fld, & + ssha_t_fld, ssha_u_fld, ssha_v_fld, & + hu_fld, hv_fld, ht_fld) + + ! Stop the timer for the time-stepping section + call timer_stop(itimer0) ! Start timer for time-stepping section CALL timer_start(itimer0, label='Time-stepping', & - num_repeats=INT(nitend-nit000+1,kind=i_def64) ) + num_repeats=INT(nitend-nit000,kind=i_def64) ) !! time stepping - do istp = nit000, nitend, 1 + do istp = nit000+1, nitend, 1 + + !call model_write_log("('istp == ',I6)",istp) call step(istp, & ua_fld, va_fld, un_fld, vn_fld, & @@ -117,6 +133,7 @@ program gocean2d !! finalise the model run call model_finalise() + !call likwid_markerClose() call model_write_log("((A))", 'Simulation finished!!') From 5400ecbaeafe6698abd14c7af00fdf63cd06c8d4 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 14 Apr 2022 01:10:59 -0700 Subject: [PATCH 2/7] Add HIP backend to the NemoLite2D kokkos makefile --- .../nemolite2d/manual_versions/psykal_kokkos/Makefile | 4 ++++ .../nemolite2d/manual_versions/psykal_kokkos/README.md | 10 +++++++--- .../psykal_kokkos/time_step_views_kokkos.cpp | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile index dffa828b..3c1d083f 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile @@ -41,6 +41,10 @@ CXX := $(KOKKOS_PATH)/bin/nvcc_wrapper -allow-unsupported-compiler KOKKOS_CUDA_OPTIONS = "enable_lambda" # If CUDA_LIB is not provided, infer path from the nvcc compiler location. CUDA_ROOT ?= $(shell echo $(shell which nvcc) | sed 's/bin\/nvcc//g') +else ifeq ($(KOKKOS_DEVICES),HIP) +$(info "Using HIP device") +CXX := hipcc +CFLAGS := -O3 else $(error "Unrecognised KOKKOS_DEVICES value: $(KOKKOS_DEVICES)") endif diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/README.md b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/README.md index cfe191a3..47e36d54 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/README.md +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/README.md @@ -52,15 +52,19 @@ to the Kokkos parallel dispatch. This allows Kokkos to control the data layout, the padding, and the synchonization between host and device (GPU execution) but it requires to keep two copies of the simulation data. This version is available in `time_step_views_kokkos.cpp` and can be built -with an OpenMP or a Cuda backend by setting the KOKKOS_DEVICES environment -variable. Note that the Cuda back-end requires that the `nvcc` compiler is -installed on the system and available in PATH. See below examples of how to +with an OpenMP, Cuda or HIP backend by setting the `KOKKOS_DEVICES` environment +variable. Note that the Cuda back-end requires that the `nvcc` compiler and +the HIP back-end requires the `hipcc` compiler. These need to be +installed on the system and the necessary paths be available in `PATH`, +`CPATH` and `LD_LIBRARY_PATH`. See below examples of how to compile the Kokkos View version for different devices: > make nemolite2d_views_kokkos KOKKOS_DEVICES=OpenMP > make nemolite2d_views_kokkos KOKKOS_DEVICES=Cuda + > make nemolite2d_views_kokkos KOKKOS_DEVICES=HIP + ## Running ## Model parameters (size of domain [jpiglo,jpjglo], number of time-steps diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp index 918ab609..3edb242f 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp @@ -11,7 +11,7 @@ #include "timing.h" #endif -#define TILE {64,4} +#define TILE {64,1} // Create 2D View types for the Fields and Grid arrays typedef Kokkos::View double_2dview; @@ -115,7 +115,11 @@ extern "C" void c_invoke_time_step( // The execution space is given as a preprocessor define when compiling // this file. e.g. `g++ -DEXEC_SPACE=OpenMP time_step_kokkos.cpp -c` #if defined (EXECUTION_SPACE) + #if EXECUTION_SPACE == HIP + using execution_space = Kokkos::Experimental::EXECUTION_SPACE; + #else using execution_space = Kokkos::EXECUTION_SPACE; + #endif #else using execution_space = Kokkos::DefaultExecutionSpace; #endif From 0f4dc3f992e25d1dc917b3c316533d263b0a7091 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 14 Apr 2022 01:22:08 -0700 Subject: [PATCH 3/7] Update llvm.sh compiler setup script for AMD GPU acceleration --- compiler_setup/llvm.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/compiler_setup/llvm.sh b/compiler_setup/llvm.sh index 33578f14..ce940aa3 100644 --- a/compiler_setup/llvm.sh +++ b/compiler_setup/llvm.sh @@ -2,30 +2,30 @@ # ================================================ # This is an experimental file so other flags may be # needed for accelerated compilation -# Alternative flags have been provided in the comments -# where they have been found to be useful # Fortran compiler F90=flang +# If flang is not available or causes compiler errors uncomment gfortran: +# F90=gfortran # C and C++ compiler CC=clang CXX=clang++ # C and C++ flags -# note that -g is used for debugging information -# as this is an experimental implementation -CFLAGS="-O3 -march=native -g" +CFLAGS="-O3" # Fortran compiler flags -# As above, -g provides debugging information -F90FLAGS="-O3 -march=native -g" +F90FLAGS="-O3" # Flags to use when compiling with OpenMP support OMPFLAGS="-fopenmp" # Flags to use when compiling with OpenMP GPU offloading support -OMPTARGETFLAGS="-fopenmp -fopenmp-targets=nvptx64" -# OMPTARGETFLAGS="–fopenmp-targets=nvptx64-nvidia-cuda" +# For AMD Rocm (march is MI50: fgx906, MI100: gfx908): +# OMPTARGETFLAGS="-target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908" +# For NVIDIA: +OMPTARGETFLAGS="–fopenmp-targets=nvptx64-nvidia-cuda" # Linker flags -LDFLAGS="-lomp -lomptarget" +LDFLAGS="-fopenmp" + # Location of various CUDA maths libraries LDFLAGS+=" -L${CUDA_MATH_DIR}/lib64" From 5ccef6aaee922d28aa3dc9d9ec558f77fb9c4608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergi=20Sis=C3=B3?= Date: Thu, 14 Apr 2022 09:31:56 +0100 Subject: [PATCH 4/7] Update NemoLite2D algorithm layer --- .../manual_versions/psykal_acc/nemolite2d.f90 | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 b/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 index 49428770..d1d004fc 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 @@ -7,9 +7,9 @@ program gocean2d use gocean2d_io_mod, only: model_write use gocean_mod, only: model_write_log, gocean_initialise, & gocean_finalise - !use likwid - !> A Horizontal 2D hydrodynamic ocean model which + !> GOcean2d is a Horizontal 2D hydrodynamic ocean model initially developed + !! by Hedong Liu, UK National Oceanography Centre (NOC), which: !! 1) using structured grid !! 2) using direct data addressing structures @@ -30,8 +30,9 @@ program gocean2d type(r2d_field) :: ua_fld, va_fld ! time stepping index - integer :: istp + integer :: istp integer :: itimer0 + integer :: warmup_iterations = 1 ! Scratch space for logging messages character(len=160) :: log_str @@ -43,13 +44,11 @@ program gocean2d ! points immediately to the North and East of a T point all have the ! same i,j index). This is the same offset scheme as used by NEMO. model_grid = grid_type(GO_ARAKAWA_C, & - ! BC_PERIODIC, BC_NON_PERIODIC ?? (/GO_BC_EXTERNAL,GO_BC_EXTERNAL,GO_BC_NONE/), & GO_OFFSET_NE) !! read in model parameters and configure the model grid CALL model_init(model_grid) - !call likwid_markerInit() ! Create fields on this grid @@ -92,24 +91,24 @@ program gocean2d ! Start timer for time-stepping section CALL timer_start(itimer0, label='Warm up', & - num_repeats=INT(1,kind=i_def64) ) + num_repeats=INT(warmup_iterations,kind=i_def64) ) - call step(nit000, & - ua_fld, va_fld, un_fld, vn_fld, & - sshn_t_fld, sshn_u_fld, sshn_v_fld, & - ssha_t_fld, ssha_u_fld, ssha_v_fld, & - hu_fld, hv_fld, ht_fld) + do istp = nit000, nit000 + warmup_iterations, 1 + call step(istp, & + ua_fld, va_fld, un_fld, vn_fld, & + sshn_t_fld, sshn_u_fld, sshn_v_fld, & + ssha_t_fld, ssha_u_fld, ssha_v_fld, & + hu_fld, hv_fld, ht_fld) + enddo ! Stop the timer for the time-stepping section call timer_stop(itimer0) ! Start timer for time-stepping section CALL timer_start(itimer0, label='Time-stepping', & - num_repeats=INT(nitend-nit000,kind=i_def64) ) + num_repeats=INT(nitend-(nit000+warmup_iterations),kind=i_def64)) !! time stepping - do istp = nit000+1, nitend, 1 - - !call model_write_log("('istp == ',I6)",istp) + do istp = nit000+warmup_iterations, nitend, 1 call step(istp, & ua_fld, va_fld, un_fld, vn_fld, & @@ -133,7 +132,6 @@ program gocean2d !! finalise the model run call model_finalise() - !call likwid_markerClose() call model_write_log("((A))", 'Simulation finished!!') From e1a7e93cf26a6eace1fad2d715d0544f7a5c882c Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 14 Apr 2022 09:51:00 +0100 Subject: [PATCH 5/7] NemoLite2D add more timing sections in the algorithm layer --- .../manual_versions/psykal_acc/nemolite2d.f90 | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 b/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 index d1d004fc..edc1f9cd 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_acc/nemolite2d.f90 @@ -10,8 +10,8 @@ program gocean2d !> GOcean2d is a Horizontal 2D hydrodynamic ocean model initially developed !! by Hedong Liu, UK National Oceanography Centre (NOC), which: - !! 1) using structured grid - !! 2) using direct data addressing structures + !! 1) uses structured grid + !! 2) uses direct data addressing structures implicit none @@ -50,6 +50,11 @@ program gocean2d !! read in model parameters and configure the model grid CALL model_init(model_grid) + ! Start timer for initialisation section (this must be after model_init + ! because dl_timer::timer_init() is called inside it) + CALL timer_start(itimer0, label='Initialise', & + num_repeats=INT(1,kind=i_def64) ) + ! Create fields on this grid ! Sea-surface height now (current time step) @@ -89,7 +94,10 @@ program gocean2d model_grid%subdomain%global%ystop call model_write_log("((A))", TRIM(log_str)) - ! Start timer for time-stepping section + ! Stop the timer for the initialisation section + call timer_stop(itimer0) + + ! Start timer for warm-up section CALL timer_start(itimer0, label='Warm up', & num_repeats=INT(warmup_iterations,kind=i_def64) ) @@ -101,7 +109,7 @@ program gocean2d hu_fld, hv_fld, ht_fld) enddo - ! Stop the timer for the time-stepping section + ! Stop the timer for the warm-up section call timer_stop(itimer0) ! Start timer for time-stepping section CALL timer_start(itimer0, label='Time-stepping', & @@ -124,12 +132,19 @@ program gocean2d ! Stop the timer for the time-stepping section call timer_stop(itimer0) + ! Start timer for checksum section + CALL timer_start(itimer0, label='Checksum reductions', & + num_repeats=INT(1,kind=i_def64) ) + ! Compute and output some checksums for error checking call model_write_log("('ua checksum = ', E16.8)", & field_checksum(ua_fld)) call model_write_log("('va checksum = ', E16.8)", & field_checksum(va_fld)) + ! Stop the timer for the checksum section + call timer_stop(itimer0) + !! finalise the model run call model_finalise() From 7a1a65dae22174c726b2f508cd5ce8772438eab7 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 14 Apr 2022 10:10:21 +0100 Subject: [PATCH 6/7] Fix NemoLite2D kokkos preprocessor macros --- .../psykal_kokkos/time_step_views_kokkos.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp index 3edb242f..147a1ce4 100644 --- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp +++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/time_step_views_kokkos.cpp @@ -115,11 +115,9 @@ extern "C" void c_invoke_time_step( // The execution space is given as a preprocessor define when compiling // this file. e.g. `g++ -DEXEC_SPACE=OpenMP time_step_kokkos.cpp -c` #if defined (EXECUTION_SPACE) - #if EXECUTION_SPACE == HIP - using execution_space = Kokkos::Experimental::EXECUTION_SPACE; - #else using execution_space = Kokkos::EXECUTION_SPACE; - #endif + // Replace execution_space with the line below for the HIP backend + // using execution_space = Kokkos::Experimental::EXECUTION_SPACE; #else using execution_space = Kokkos::DefaultExecutionSpace; #endif From 002a6a3afe57787d649146d169cee5a07407bb3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergi=20Sis=C3=B3?= Date: Tue, 21 Jun 2022 12:13:46 +0100 Subject: [PATCH 7/7] Update nemolite2d psykal version to use multiple timing sections --- .../nemo/nemolite2d/psykal/nemolite2d_alg.f90 | 63 ++++++++++++++----- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/benchmarks/nemo/nemolite2d/psykal/nemolite2d_alg.f90 b/benchmarks/nemo/nemolite2d/psykal/nemolite2d_alg.f90 index c37d18cd..b620561b 100644 --- a/benchmarks/nemo/nemolite2d/psykal/nemolite2d_alg.f90 +++ b/benchmarks/nemo/nemolite2d/psykal/nemolite2d_alg.f90 @@ -31,7 +31,10 @@ program gocean2d ! time stepping index integer :: istp integer :: itimer0 - integer(i_def64) :: nrepeat + integer :: warmup_iterations = 1 + + ! Scratch space for logging messages + character(len=160) :: log_str call gocean_initialise() @@ -46,6 +49,11 @@ program gocean2d !! read in model parameters and configure the model grid CALL model_init(model_grid) + ! Start timer for initialisation section (this must be after model_init + ! because dl_timer::timer_init() is called inside it) + CALL timer_start(itimer0, label='Initialise', & + num_repeats=INT(1,kind=i_def64) ) + ! Create fields on this grid ! Sea-surface height now (current time step) @@ -78,13 +86,37 @@ program gocean2d call model_write(model_grid, 0, ht_fld, sshn_t_fld, un_fld, vn_fld) + write(log_str, "('Simulation domain = (',I4,':',I4,',',I4,':',I4,')')") & + model_grid%subdomain%global%xstart, & + model_grid%subdomain%global%xstop, & + model_grid%subdomain%global%ystart, & + model_grid%subdomain%global%ystop + call model_write_log("((A))", TRIM(log_str)) + + ! Stop the timer for the initialisation section + call timer_stop(itimer0) + + ! Start timer for warm-up section + CALL timer_start(itimer0, label='Warm up', & + num_repeats=INT(warmup_iterations,kind=i_def64) ) + + do istp = nit000, nit000 + warmup_iterations, 1 + call step(istp, & + ua_fld, va_fld, un_fld, vn_fld, & + sshn_t_fld, sshn_u_fld, sshn_v_fld, & + ssha_t_fld, ssha_u_fld, ssha_v_fld, & + hu_fld, hv_fld, ht_fld) + enddo + + ! Stop the timer for the warm-up section + call timer_stop(itimer0) + ! Start timer for time-stepping section - nrepeat = nitend - nit000 + 1 - call model_write_log("((A))", '=== Start Time-stepping ===') - CALL timer_start(itimer0, label='Time-stepping', num_repeats=nrepeat) + CALL timer_start(itimer0, label='Time-stepping', & + num_repeats=INT(nitend-(nit000+warmup_iterations),kind=i_def64)) !! time stepping - do istp = nit000, nitend, 1 + do istp = nit000+warmup_iterations, nitend, 1 call step(istp, & ua_fld, va_fld, un_fld, vn_fld, & @@ -100,23 +132,22 @@ program gocean2d ! Stop the timer for the time-stepping section call timer_stop(itimer0) - call model_write_log("((A))", '=== Time-stepping finished ===') + ! Start timer for checksum section + CALL timer_start(itimer0, label='Checksum reductions', & + num_repeats=INT(1,kind=i_def64) ) ! Compute and output some checksums for error checking - call model_write_log("('ua checksum = ',E16.8)", field_checksum(ua_fld)) - call model_write_log("('va checksum = ',E16.8)", field_checksum(va_fld)) - ! call model_write_log("('ssh_u checksum = ',E16.8)", & - ! field_checksum(sshn_u_fld)) - ! call model_write_log("('ssh_v checksum = ',E16.8)", & - ! field_checksum(sshn_v_fld)) - ! call model_write_log("('ssh_t checksum = ',E16.8)", & - ! field_checksum(sshn_t_fld)) + call model_write_log("('ua checksum = ', E16.8)", & + field_checksum(ua_fld)) + call model_write_log("('va checksum = ', E16.8)", & + field_checksum(va_fld)) + + ! Stop the timer for the checksum section + call timer_stop(itimer0) !! finalise the model run call model_finalise() - call model_write_log("((A))", 'Simulation finished!!') - call gocean_finalise() end program gocean2d