Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions examples/nemo/scripts/omp_gpu_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,6 @@
]

OFFLOADING_ISSUES = [
# Produces different output results
"zdftke.f90",
# The following issues only affect BENCH (because ice is enabled?)
# Runtime Error: Illegal address during kernel execution
"trcrad.f90",
Expand Down Expand Up @@ -177,9 +175,11 @@ def trans(psyir):
continue
if not NEMOV4 and psyir.name in NEMOV5_EXCLUSIONS:
continue
# ICE routines do not perform well on GPU, so we skip them
# ICE and ICB routines do not perform well on GPU, so we skip them
if psyir.name.startswith("ice"):
continue
if psyir.name.startswith("icb"):
continue
# Skip initialisation and diagnostic subroutines
if (subroutine.name.endswith('_alloc') or
subroutine.name.endswith('_init') or
Expand All @@ -189,6 +189,12 @@ def trans(psyir):
subroutine.name == 'dom_zgr' or
subroutine.name == 'dom_ngb'):
continue
if subroutine.name == "solfrac_mod.f90":
# Bring these solfrac parameters to the subroutine as nvidia
# does not permit offloaded kernels to access module parameters
symtab = subroutine.symbol_table
symtab.add(symtab.lookup("pp_wgt"))
symtab.add(symtab.lookup("pp_len"))

normalise_loops(
subroutine,
Expand Down Expand Up @@ -229,7 +235,6 @@ def trans(psyir):
region_directive_trans=omp_target_trans,
loop_directive_trans=omp_gpu_loop_trans,
collapse=True,
privatise_arrays=False,
asynchronous_parallelism=enable_async,
uniform_intrinsics_only=REPRODUCIBLE,
enable_reductions=not REPRODUCIBLE
Expand All @@ -242,7 +247,6 @@ def trans(psyir):
loop_directive_trans=omp_gpu_loop_trans,
collapse=True,
asynchronous_parallelism=enable_async,
privatise_arrays=True,
uniform_intrinsics_only=REPRODUCIBLE,
enable_reductions=not REPRODUCIBLE
)
Expand All @@ -258,7 +262,6 @@ def trans(psyir):
subroutine,
loop_directive_trans=omp_cpu_loop_trans,
asynchronous_parallelism=enable_async,
privatise_arrays=True,
)

# Iterate again and add profiling hooks when needed
Expand Down
5 changes: 4 additions & 1 deletion examples/nemo/scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,10 @@ def increase_rank_and_reorder_nemov5_loops(routine: Routine):
# Map of routines and arrays
selection = {
"dyn_zdf": ['zwd', 'zwi', 'zws'],
"tra_zdf_imp": ['zwd', 'zwi', 'zws', 'zwt']
"tra_zdf_imp": ['zwd', 'zwi', 'zws', 'zwt'],
"tke_tke": ['zice_fra', 'zd_lw', 'zd_up', 'zdiag', 'zwlc2', 'zpelc',
'imlc', 'zhlc', 'zus3'],
"tke_avn": ['zmxlm', 'zmxld']
}

if routine.name not in selection:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ def validate(
# Assignments to the variable are fine, because the value
# we will just be repeated to each index of the new rank
continue
if (isinstance(check.parent, IntrinsicCall) and
check.parent.is_inquiry and
"dim" in check.parent.argument_names):
# Inquiry intrinsics calls to a given 'dim' are still ok
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great comment, thanks :-)

# because the specific dims still match (the new dimension
# is the outer one) and the per-rank information is still
# the same
continue
# Everything else is currently forbidden
non_supported_outside_loop_symbols.add(check.symbol)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,16 @@ def test_irla_apply(fortran_reader, fortran_writer):

def test_irla_apply_accesses_outside_loop(
fortran_reader, fortran_writer, tmpdir):
''' Check that the accesses outside the loop are also populate the whole
array, this will imply duplicated computations for each value '''
''' Check that simple accesses outside the loop, such as array
assignments where we can populate the whole array, or uses within
an inquiry intrinsic with a 'dim' which will return information
about the same rank, are valid. '''
psyir = fortran_reader.psyir_from_source("""
program test
integer, parameter :: N=10, M=10
integer :: i, j
real, dimension(N) :: ztmp
integer :: size_of_ztmp_dim
! Implicit loops
ztmp = 1
! Range loop
Expand All @@ -243,6 +246,7 @@ def test_irla_apply_accesses_outside_loop(
end do
ztmp(:) = 5
end do
size_of_ztmp_dim = SIZE(ztmp, dim=1)
end program
""")
trans = IncreaseRankLoopArraysTrans()
Expand Down
Loading