From 0a9ad2fdb04b111074b57ef45ad2fbe89587dc99 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 14 Jan 2026 13:28:09 +0000 Subject: [PATCH 1/3] #3281 Add tmp rank increase for tke_tke --- examples/nemo/scripts/omp_gpu_trans.py | 5 ----- examples/nemo/scripts/utils.py | 4 +++- .../psyir/transformations/increase_rank_loop_arrays_trans.py | 5 +++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index ff5f99cd45..8392dc5274 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -93,8 +93,6 @@ ] OFFLOADING_ISSUES = [ - # Produces different output results - "zdftke.f90", # The following issues only affect BENCH (because ice is enabled?) # Runtime Error: Illegal address during kernel execution "trcrad.f90", @@ -229,7 +227,6 @@ def trans(psyir): region_directive_trans=omp_target_trans, loop_directive_trans=omp_gpu_loop_trans, collapse=True, - privatise_arrays=False, asynchronous_parallelism=enable_async, uniform_intrinsics_only=REPRODUCIBLE, enable_reductions=not REPRODUCIBLE @@ -242,7 +239,6 @@ def trans(psyir): loop_directive_trans=omp_gpu_loop_trans, collapse=True, asynchronous_parallelism=enable_async, - privatise_arrays=True, uniform_intrinsics_only=REPRODUCIBLE, enable_reductions=not REPRODUCIBLE ) @@ -258,7 +254,6 @@ def trans(psyir): subroutine, loop_directive_trans=omp_cpu_loop_trans, asynchronous_parallelism=enable_async, - privatise_arrays=True, ) # Iterate again and add profiling hooks when needed diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index 522fdfb556..8c8125fba0 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -286,7 +286,9 @@ def increase_rank_and_reorder_nemov5_loops(routine: Routine): # Map of routines and arrays selection = { "dyn_zdf": ['zwd', 'zwi', 'zws'], - "tra_zdf_imp": ['zwd', 'zwi', 'zws', 'zwt'] + "tra_zdf_imp": ['zwd', 'zwi', 'zws', 'zwt'], + "tke_tke": ['zice_fra', 'zd_lw', 'zd_up', 'zdiag', 'zwlc2', 'zpelc', + 'imlc', 'zhlc', 'zus3'] } if routine.name not in selection: diff --git a/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py b/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py index 1e616f9bff..3567cffd90 100644 --- a/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py +++ b/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py @@ -196,6 +196,11 @@ def validate( # Assignments to the variable are fine, because the value # we will just be repeated to each index of the new rank continue + if (isinstance(check.parent, IntrinsicCall) and + "dim" in check.parent.argument_names): + # Intrinsics calls to a given 'dim' are still ok because + # they still get the same rank + continue # Everything else is currently forbidden non_supported_outside_loop_symbols.add(check.symbol) From 8e39f2010aa46f150a5515c142be847166df50c4 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 29 Jan 2026 15:36:42 +0000 Subject: [PATCH 2/3] Add increase rank in tke_avn and fix solfrac parameters issue --- examples/nemo/scripts/omp_gpu_trans.py | 11 ++++++++++- examples/nemo/scripts/utils.py | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index 972e066d0e..cecf351e64 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -80,6 +80,7 @@ NEMOV5_EXCLUSIONS = [ # get_cssrcsurf produces signal SIGFPE, Arithmetic exception "sbcclo.f90", + "fldread.f90", ] NEMOV4_EXCLUSIONS = [ @@ -175,9 +176,11 @@ def trans(psyir): continue if not NEMOV4 and psyir.name in NEMOV5_EXCLUSIONS: continue - # ICE routines do not perform well on GPU, so we skip them + # ICE and ICB routines do not perform well on GPU, so we skip them if psyir.name.startswith("ice"): continue + if psyir.name.startswith("icb"): + continue # Skip initialisation and diagnostic subroutines if (subroutine.name.endswith('_alloc') or subroutine.name.endswith('_init') or @@ -187,6 +190,12 @@ def trans(psyir): subroutine.name == 'dom_zgr' or subroutine.name == 'dom_ngb'): continue + if subroutine.name == "solfrac_mod.f90": + # Bring these solfrac parameters to the subroutine as nvidia + # does not permit offloaded kernels to access module parameters + symtab = subroutine.symbol_table + symtab.add(symtab.lookup("pp_wgt")) + symtab.add(symtab.lookup("pp_len")) normalise_loops( subroutine, diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index f32045fe7c..8d833d5600 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -291,7 +291,8 @@ def increase_rank_and_reorder_nemov5_loops(routine: Routine): "dyn_zdf": ['zwd', 'zwi', 'zws'], "tra_zdf_imp": ['zwd', 'zwi', 'zws', 'zwt'], "tke_tke": ['zice_fra', 'zd_lw', 'zd_up', 'zdiag', 'zwlc2', 'zpelc', - 'imlc', 'zhlc', 'zus3'] + 'imlc', 'zhlc', 'zus3'], + "tke_avn": ['zmxlm', 'zmxld'] } if routine.name not in selection: From 497c903d42bc37a4f85e72c800e809791ec322a7 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 29 Jan 2026 16:00:50 +0000 Subject: [PATCH 3/3] #3281 Add test for new increase_rank condition --- examples/nemo/scripts/omp_gpu_trans.py | 1 - .../transformations/increase_rank_loop_arrays_trans.py | 7 +++++-- .../transformations/increase_rank_loop_arrays_test.py | 8 ++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index cecf351e64..d56f7dc10a 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -80,7 +80,6 @@ NEMOV5_EXCLUSIONS = [ # get_cssrcsurf produces signal SIGFPE, Arithmetic exception "sbcclo.f90", - "fldread.f90", ] NEMOV4_EXCLUSIONS = [ diff --git a/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py b/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py index 5452da5abe..05e7b36ac2 100644 --- a/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py +++ b/src/psyclone/psyir/transformations/increase_rank_loop_arrays_trans.py @@ -197,9 +197,12 @@ def validate( # we will just be repeated to each index of the new rank continue if (isinstance(check.parent, IntrinsicCall) and + check.parent.is_inquiry and "dim" in check.parent.argument_names): - # Intrinsics calls to a given 'dim' are still ok because - # they still get the same rank + # Inquiry intrinsics calls to a given 'dim' are still ok + # because the specific dims still match (the new dimension + # is the outer one) and the per-rank information is still + # the same continue # Everything else is currently forbidden non_supported_outside_loop_symbols.add(check.symbol) diff --git a/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py b/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py index c2f9bd28b1..a2f1ebd0cd 100644 --- a/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py +++ b/src/psyclone/tests/psyir/transformations/increase_rank_loop_arrays_test.py @@ -221,13 +221,16 @@ def test_irla_apply(fortran_reader, fortran_writer): def test_irla_apply_accesses_outside_loop( fortran_reader, fortran_writer, tmpdir): - ''' Check that the accesses outside the loop are also populate the whole - array, this will imply duplicated computations for each value ''' + ''' Check that simple accesses outside the loop, such as array + assignments where we can populate the whole array, or uses within + an inquiry intrinsic with a 'dim' which will return information + about the same rank, are valid. ''' psyir = fortran_reader.psyir_from_source(""" program test integer, parameter :: N=10, M=10 integer :: i, j real, dimension(N) :: ztmp + integer :: size_of_ztmp_dim ! Implicit loops ztmp = 1 ! Range loop @@ -243,6 +246,7 @@ def test_irla_apply_accesses_outside_loop( end do ztmp(:) = 5 end do + size_of_ztmp_dim = SIZE(ztmp, dim=1) end program """) trans = IncreaseRankLoopArraysTrans()