diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index 709c0f5e5d..d56f7dc10a 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -96,8 +96,6 @@ ] OFFLOADING_ISSUES = [ - # Produces different output results - "zdftke.f90", # The following issues only affect BENCH (because ice is enabled?) # Runtime Error: Illegal address during kernel execution "trcrad.f90", @@ -177,9 +175,11 @@ def trans(psyir): continue if not NEMOV4 and psyir.name in NEMOV5_EXCLUSIONS: continue - # ICE routines do not perform well on GPU, so we skip them + # ICE and ICB routines do not perform well on GPU, so we skip them if psyir.name.startswith("ice"): continue + if psyir.name.startswith("icb"): + continue # Skip initialisation and diagnostic subroutines if (subroutine.name.endswith('_alloc') or subroutine.name.endswith('_init') or @@ -189,6 +189,12 @@ def trans(psyir): subroutine.name == 'dom_zgr' or subroutine.name == 'dom_ngb'): continue + if subroutine.name == "solfrac_mod.f90": + # Bring these solfrac parameters to the subroutine as nvidia + # does not permit offloaded kernels to access module parameters + symtab = subroutine.symbol_table + symtab.add(symtab.lookup("pp_wgt")) + symtab.add(symtab.lookup("pp_len")) normalise_loops( subroutine, @@ -229,7 +235,6 @@ def trans(psyir): region_directive_trans=omp_target_trans, loop_directive_trans=omp_gpu_loop_trans, collapse=True, - privatise_arrays=False, asynchronous_parallelism=enable_async, uniform_intrinsics_only=REPRODUCIBLE, enable_reductions=not REPRODUCIBLE @@ -242,7 +247,6 @@ def trans(psyir): loop_directive_trans=omp_gpu_loop_trans, collapse=True, asynchronous_parallelism=enable_async, - privatise_arrays=True, uniform_intrinsics_only=REPRODUCIBLE, enable_reductions=not REPRODUCIBLE ) @@ -258,7 +262,6 @@ def trans(psyir): subroutine, loop_directive_trans=omp_cpu_loop_trans, asynchronous_parallelism=enable_async, - privatise_arrays=True, ) # Iterate again and add profiling hooks when needed diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index a7c703c66b..8d833d5600 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -289,7 +289,10 @@ def increase_rank_and_reorder_nemov5_loops(routine: Routine): # Map of routines and arrays selection = { "dyn_zdf": ['zwd', 'zwi', 'zws'], - "tra_zdf_imp": ['zwd', 'zwi', 'zws', 'zwt'] + "tra_zdf_imp": ['zwd', 'zwi', 'zws', 'zwt'], + "tke_tke": ['zice_fra', 'zd_lw', 'zd_up', 'zdiag', 'zwlc2', 'zpelc', + 'imlc', 'zhlc', 'zus3'], + "tke_avn": ['zmxlm', 'zmxld'] } if routine.name not in selection: diff --git a/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py b/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py index 8135228131..05e7b36ac2 100644 --- a/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py +++ b/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py @@ -196,6 +196,14 @@ def validate( # Assignments to the variable are fine, because the value # we will just be repeated to each index of the new rank continue + if (isinstance(check.parent, IntrinsicCall) and + check.parent.is_inquiry and + "dim" in check.parent.argument_names): + # Inquiry intrinsics calls to a given 'dim' are still ok + # because the specific dims still match (the new dimension + # is the outer one) and the per-rank information is still + # the same + continue # Everything else is currently forbidden non_supported_outside_loop_symbols.add(check.symbol) diff --git a/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py b/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py index c2f9bd28b1..a2f1ebd0cd 100644 --- a/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py +++ b/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py @@ -221,13 +221,16 @@ def test_irla_apply(fortran_reader, fortran_writer): def test_irla_apply_accesses_outside_loop( fortran_reader, fortran_writer, tmpdir): - ''' Check that the accesses outside the loop are also populate the whole - array, this will imply duplicated computations for each value ''' + ''' Check that simple accesses outside the loop, such as array + assignments where we can populate the whole array, or uses within + an inquiry intrinsic with a 'dim' which will return information + about the same rank, are valid. ''' psyir = fortran_reader.psyir_from_source(""" program test integer, parameter :: N=10, M=10 integer :: i, j real, dimension(N) :: ztmp + integer :: size_of_ztmp_dim ! Implicit loops ztmp = 1 ! Range loop @@ -243,6 +246,7 @@ def test_irla_apply_accesses_outside_loop( end do ztmp(:) = 5 end do + size_of_ztmp_dim = SIZE(ztmp, dim=1) end program """) trans = IncreaseRankLoopArraysTrans()