Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 43 additions & 14 deletions benchmarks/nemo/tracer_advection/compute_in_subroutine/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ tra_adv_no_auto_serial: dl_timer
# OpenACC version with timer around outer loop only.
tra_adv_acc: dl_timer
mkdir -p $@
${PSYCLONE} -s ../scripts/kernels_trans.py -opsy \
${PSYCLONE} -s ../scripts/acc_kernels_trans.py -opsy \
$@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
cp Makefile_gen $@/Makefile
cp tra_adv_driver.F90 $@/.
Expand All @@ -55,39 +55,68 @@ tra_adv_acc: dl_timer
# OpenACC version with nvtx profiling instrumentation.
tra_adv_acc_prof: dl_timer
mkdir -p $@
${PSYCLONE} --profile invokes -s ../scripts/kernels_trans.py -opsy \
${PSYCLONE} --profile invokes -s ../scripts/acc_kernels_trans.py -opsy \
$@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
cp Makefile_gen $@/Makefile
cp tra_adv_driver.F90 $@/.
${MAKE} PROF_LIB_INC="-I${PSYCLONE_NVIDIA_LIB_DIR} -I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME} ${PSYCLONE_NVIDIA_LIB_DIR}/libnvtx_prof.a" -C $@

# Serial version in SIR compliant form.
tra_adv_no_scalars_serial: dl_timer
mkdir -p $@
cp tra_adv_driver_no_scalars.F90 $@/tra_adv_driver.F90
cp tra_adv_compute_no_scalars.F90 $@/tra_adv_compute.F90
cp Makefile_gen $@/Makefile
${MAKE} PROF_LIB_INC="-I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@

# OpenACC version with timer around outer loop only.
tra_adv_no_scalars_acc: dl_timer
mkdir -p $@
${PSYCLONE} -s ../scripts/acc_kernels_trans.py -opsy \
$@/tra_adv_compute.f90 ./tra_adv_compute_no_scalars.F90
cp Makefile_gen $@/Makefile
cp tra_adv_driver_no_scalars.F90 $@/tra_adv_driver.F90
${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} ${UMEMFLAGS} -I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ${ACCFLAGS} ${UMEMFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" PROF_LIB_INC="-I../${DL_TIMER_DIR}/src" \
-C $@

# Serial Fortran version after transformation to SIR-compliant form.
tra_adv_sir: dl_timer
tra_adv_sir: dl_timer ../scripts/sir_loop_trans.py
mkdir -p $@
${PSYCLONE} -s ../scripts/sir_trans.py -opsy $@/tra_adv_compute.f90 \
./tra_adv_compute_auto_arrays.F90
${PSYCLONE} -s ../scripts/sir_loop_trans.py -opsy $@/tra_adv_compute.f90 \
./tra_adv_compute_no_scalars.F90
cp Makefile_gen $@/Makefile
cp tra_adv_driver.F90 $@/.
cp tra_adv_driver_no_scalars.F90 $@/tra_adv_driver.F90
${MAKE} PROF_LIB_INC="-I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@

# OpenACC added after transformation to SIR-compliant form.
tra_adv_sir_acc: dl_timer
tra_adv_sir_acc_um: dl_timer ../scripts/sir_loop_kernels_um_trans.py
mkdir -p $@
${PSYCLONE} -s ../scripts/sir_kernels_trans.py -opsy \
$@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
${PSYCLONE} -s ../scripts/sir_loop_kernels_trans.py -opsy \
$@/tra_adv_compute.f90 ./tra_adv_compute_no_scalars.F90
cp Makefile_gen $@/Makefile
cp tra_adv_driver.F90 $@/.
${MAKE} PROF_LIB_INC="-I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
cp tra_adv_driver_no_scalars.F90 $@/tra_adv_driver.f90
${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} ${UMEMFLAGS} -Mfma -I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ${ACCFLAGS} ${UMEMFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" PROF_LIB_INC="-I../${DL_TIMER_DIR}/src" -C $@

tra_adv_sir_acc: dl_timer ../scripts/sir_loop_kernels_trans.py
mkdir -p $@
${PSYCLONE} -s ../scripts/sir_loop_kernels_trans.py -opsy \
$@/tra_adv_compute.f90 ./tra_adv_compute_no_scalars.F90
cp Makefile_gen $@/Makefile
cp tra_adv_driver_no_scalars.F90 $@/tra_adv_driver.f90
${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} -I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ${ACCFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" PROF_LIB_INC="-I../${DL_TIMER_DIR}/src" -C $@

tra_adv_sir_acc_prof: dl_timer
mkdir -p $@
${PSYCLONE} --profile invokes -s ../scripts/sir_kernels_trans.py -opsy \
$@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
$@/tra_adv_compute.f90 ./tra_adv_compute_no_scalars.F90
cp Makefile_gen $@/Makefile
cp tra_adv_driver.F90 $@/.
cp tra_adv_driver_no_scalars.F90 $@/tra_adv_driver.f90
${MAKE} PROF_LIB_INC="-I${PSYCLONE_NVIDIA_LIB_DIR} -I../${DL_TIMER_DIR}/src" \
LDFLAGS="${LDFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME} ${PSYCLONE_NVIDIA_LIB_DIR}/libnvtx_prof.a" -C $@

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
module tra_adv_compute_mod
implicit none

contains

subroutine tra_adv_compute(zind, tsn, ztfreez, rnfmsk, rnfmsk_z, upsmsk, tmask, zwx, zwy, umask, vmask, mydomain, zslpx, zslpy, pun, pvn, pwn, jpi, jpj, jpk, iter)

REAL*8, ALLOCATABLE, DIMENSION(:, :, :), intent(inout):: zind
REAL*8, ALLOCATABLE, DIMENSION(:, :, :), intent(in) :: tsn, tmask
REAL*8, ALLOCATABLE, DIMENSION(:, :), intent(in) :: ztfreez, rnfmsk, upsmsk
REAL*8, ALLOCATABLE, DIMENSION(:), intent(in) :: rnfmsk_z
REAL*8, ALLOCATABLE, DIMENSION(:, :, :), intent(inout) :: zwx, zwy
REAL*8, ALLOCATABLE, DIMENSION(:, :, :), intent(in) :: umask, vmask

REAL*8, ALLOCATABLE, DIMENSION(:, :, :), intent(inout) :: zslpx, zslpy
REAL*8, ALLOCATABLE, DIMENSION(:, :, :), intent(inout) :: mydomain
REAL*8, ALLOCATABLE, DIMENSION(:, :, :), intent(in) :: pun, pvn

REAL*8, ALLOCATABLE, DIMENSION(:,:,:), intent(in) :: pwn

INTEGER, INTENT(IN) :: jpi, jpj, jpk
INTEGER, INTENT(IN) :: iter

REAL*8 :: zbtr, ztra

REAL*8 :: z0u, zalpha, zu, zdt, zzwx, zzwy, z0v, zv

REAL*8 :: zice

REAL*8 :: z0w, zw

INTEGER :: ji, jj, jk

DO jk = 1, jpk
DO jj = 1, jpj
DO ji = 1, jpi
zice = 0.d0
IF (tsn(ji, jj, jk) <= ztfreez(ji, jj) + 0.1d0) THEN; zice = 1.d0
ELSE; zice = 0.d0
END IF

zind(ji, jj, jk) = MAX( &
rnfmsk(ji, jj)*rnfmsk_z(jk), &
upsmsk(ji, jj), &
zice &
& )*tmask(ji, jj, jk)
zind(ji, jj, jk) = 1 - zind(ji, jj, jk)
END DO
END DO
END DO

zwx(:, :, jpk) = 0.e0; zwy(:, :, jpk) = 0.e0

DO jk = 1, jpk - 1
DO jj = 1, jpj - 1
DO ji = 1, jpi - 1
zwx(ji, jj, jk) = umask(ji, jj, jk)*(mydomain(ji + 1, jj, jk) - mydomain(ji, jj, jk))
zwy(ji, jj, jk) = vmask(ji, jj, jk)*(mydomain(ji, jj + 1, jk) - mydomain(ji, jj, jk))
END DO
END DO
END DO

zslpx(:, :, jpk) = 0.e0; zslpy(:, :, jpk) = 0.e0

DO jk = 1, jpk - 1
DO jj = 2, jpj
DO ji = 2, jpi
zslpx(ji, jj, jk) = (zwx(ji, jj, jk) + zwx(ji - 1, jj, jk)) &
& *(0.25d0 + SIGN(0.25d0, zwx(ji, jj, jk)*zwx(ji - 1, jj, jk)))
zslpy(ji, jj, jk) = (zwy(ji, jj, jk) + zwy(ji, jj - 1, jk)) &
& *(0.25d0 + SIGN(0.25d0, zwy(ji, jj, jk)*zwy(ji, jj - 1, jk)))
END DO
END DO
END DO

DO jk = 1, jpk - 1
DO jj = 2, jpj
DO ji = 2, jpi
zslpx(ji, jj, jk) = SIGN(1.d0, zslpx(ji, jj, jk))*MIN(ABS(zslpx(ji, jj, jk)), &
& 2.d0*ABS(zwx(ji - 1, jj, jk)), &
& 2.d0*ABS(zwx(ji, jj, jk)))
zslpy(ji, jj, jk) = SIGN(1.d0, zslpy(ji, jj, jk))*MIN(ABS(zslpy(ji, jj, jk)), &
& 2.d0*ABS(zwy(ji, jj - 1, jk)), &
& 2.d0*ABS(zwy(ji, jj, jk)))
END DO
END DO
END DO

DO jk = 1, jpk - 1
!zdt = 1
DO jj = 2, jpj - 1
DO ji = 2, jpi - 1
z0u = SIGN(0.5d0, pun(ji, jj, jk))
zalpha = 0.5d0 - z0u
!zu = z0u - 0.5d0*pun(ji, jj, jk)*zdt
zu = z0u - 0.5d0*pun(ji, jj, jk)*1.0

zzwx = mydomain(ji + 1, jj, jk) + zind(ji, jj, jk)*(zu*zslpx(ji + 1, jj, jk))
zzwy = mydomain(ji, jj, jk) + zind(ji, jj, jk)*(zu*zslpx(ji, jj, jk))

zwx(ji, jj, jk) = pun(ji, jj, jk)*(zalpha*zzwx + (1.-zalpha)*zzwy)

z0v = SIGN(0.5d0, pvn(ji, jj, jk))
zalpha = 0.5d0 - z0v
!zv = z0v - 0.5d0*pvn(ji, jj, jk)*zdt
zv = z0v - 0.5d0*pvn(ji, jj, jk)*1.0

zzwx = mydomain(ji, jj + 1, jk) + zind(ji, jj, jk)*(zv*zslpy(ji, jj + 1, jk))
zzwy = mydomain(ji, jj, jk) + zind(ji, jj, jk)*(zv*zslpy(ji, jj, jk))

zwy(ji, jj, jk) = pvn(ji, jj, jk)*(zalpha*zzwx + (1.d0 - zalpha)*zzwy)
END DO
END DO
END DO

DO jk = 1, jpk - 1
DO jj = 2, jpj - 1
DO ji = 2, jpi - 1
zbtr = 1.
ztra = -zbtr*(zwx(ji, jj, jk) - zwx(ji - 1, jj, jk) &
& + zwy(ji, jj, jk) - zwy(ji, jj - 1, jk))
mydomain(ji, jj, jk) = mydomain(ji, jj, jk) + ztra
END DO
END DO
END DO

zwx(:, :, 1) = 0.e0; zwx(:, :, jpk) = 0.e0

DO jk = 2, jpk - 1
zwx(:, :, jk) = tmask(:, :, jk)*(mydomain(:, :, jk - 1) - mydomain(:, :, jk))
END DO

zslpx(:, :, 1) = 0.e0

DO jk = 2, jpk - 1
DO jj = 1, jpj
DO ji = 1, jpi
zslpx(ji, jj, jk) = (zwx(ji, jj, jk) + zwx(ji, jj, jk + 1)) &
& *(0.25d0 + SIGN(0.25d0, zwx(ji, jj, jk)*zwx(ji, jj, jk + 1)))
END DO
END DO
END DO

DO jk = 2, jpk - 1
DO jj = 1, jpj
DO ji = 1, jpi
zslpx(ji, jj, jk) = SIGN(1.d0, zslpx(ji, jj, jk))*MIN(ABS(zslpx(ji, jj, jk)), &
& 2.d0*ABS(zwx(ji, jj, jk + 1)), &
& 2.d0*ABS(zwx(ji, jj, jk)))
END DO
END DO
END DO

zwx(:, :, 1) = pwn(:, :, 1)*mydomain(:, :, 1)

!zdt = 1
!zbtr = 1.
!DO jk = 1, jpk-1
! DO jj = 2, jpj-1
! DO ji = 2, jpi-1
! z0w = SIGN( 0.5d0, pwn(ji,jj,jk+1) )
! zalpha = 0.5d0 + z0w
! zw = z0w - 0.5d0 * pwn(ji,jj,jk+1) * zdt * zbtr
!
! zzwx = mydomain(ji,jj,jk+1) + zind(ji,jj,jk) * (zw * zslpx(ji,jj,jk+1))
! zzwy = mydomain(ji,jj,jk ) + zind(ji,jj,jk) * (zw * zslpx(ji,jj,jk ))
!
! zwx(ji,jj,jk+1) = pwn(ji,jj,jk+1) * ( zalpha * zzwx + (1.-zalpha) * zzwy )
! END DO
! END DO
!END DO
DO jk = 2, jpk
DO jj = 2, jpj - 1
DO ji = 2, jpi - 1
z0w = SIGN(0.5d0, pwn(ji, jj, jk))
zalpha = 0.5d0 + z0w
!zw = z0w - 0.5d0*pwn(ji, jj, jk)*zdt*zbtr
zw = z0w - 0.5d0*pwn(ji, jj, jk)*1.0*1.0

zzwx = mydomain(ji, jj, jk) + zind(ji, jj, jk - 1)*(zw*zslpx(ji, jj, jk))
zzwy = mydomain(ji, jj, jk - 1) + zind(ji, jj, jk - 1)*(zw*zslpx(ji, jj, jk - 1))

zwx(ji, jj, jk) = pwn(ji, jj, jk)*(zalpha*zzwx + (1.-zalpha)*zzwy)
END DO
END DO
END DO

!zbtr = 1.
DO jk = 1, jpk - 1
DO jj = 2, jpj - 1
DO ji = 2, jpi - 1
!ztra = -zbtr*(zwx(ji, jj, jk) - zwx(ji, jj, jk + 1))
ztra = -1.0*(zwx(ji, jj, jk) - zwx(ji, jj, jk + 1))
mydomain(ji, jj, jk) = ztra
END DO
END DO
END DO

end subroutine tra_adv_compute

end module tra_adv_compute_mod
Loading