diff --git a/.github/workflows/makefile-test.yml b/.github/workflows/makefile-test.yml
index 245a0a78..c6185ab8 100644
--- a/.github/workflows/makefile-test.yml
+++ b/.github/workflows/makefile-test.yml
@@ -47,12 +47,12 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
       with:
         submodules: recursive
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
     - run: python -m pip install --upgrade pip
     - run: cd shared/PSyclone && pip install .
     - name: Install dependencies
diff --git a/benchmarks/nemo/nemolite2d/kernels/fortran/boundary_conditions_mod.f90 b/benchmarks/nemo/nemolite2d/kernels/fortran/boundary_conditions_mod.f90
index ca47f880..ca10aa6a 100644
--- a/benchmarks/nemo/nemolite2d/kernels/fortran/boundary_conditions_mod.f90
+++ b/benchmarks/nemo/nemolite2d/kernels/fortran/boundary_conditions_mod.f90
@@ -5,7 +5,6 @@ module boundary_conditions_mod
        GO_STENCIL
   use kernel_mod, only: kernel_type, GO_POINTWISE, GO_DOFS, &
       GO_ALL_PTS, GO_INTERNAL_PTS
-  use physical_params_mod
   use grid_mod
   use field_mod
   implicit none
diff --git a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile
index 9be53b00..65725e62 100644
--- a/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile
+++ b/benchmarks/nemo/nemolite2d/manual_versions/psykal_kokkos/Makefile
@@ -16,6 +16,9 @@ KOKKOS_PATH ?= $(SHARED_DIR)/kokkos
 KOKKOS_DEBUG ?= no  # Careful, 10x performance penalty in kernels.
 CXXFLAGS = $(CFLAGS)  # Use same CFLAGS to compile Kokkos library.
 
+# The Kokkos Makefile is deprecated, but we can still use it with:
+KOKKOS_USE_DEPRECATED_MAKEFILES=1
+
 # If no KOKKOS_DEVICES is specified, by default use the OpenMP
 KOKKOS_DEVICES ?= OpenMP
 
@@ -90,7 +93,7 @@ clean:
 	${MAKE} -C ${INF_DIR} clean
 	rm -f *.o *.mod *.MOD *~ *.dat
 	rm -f gnu_opt_report.txt *.optrpt
-	rm -rf KokkosCore_* Makefile.kokkos.f90
+	rm -rf KokkosCore_* Makefile.kokkos.f90 desul Desul_Config.tmp
 
 allclean: clean
 	rm -f *.exe fparser.log *.a
diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py
index 7725c55f..3d1273b7 100644
--- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py
+++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py
@@ -3,28 +3,39 @@
 
 from psyclone.domain.common.transformations import KernelModuleInlineTrans
 from psyclone.psyGen import TransInfo
-from psyclone.psyir.nodes import Loop
+from psyclone.psyir.nodes import Container, Loop, Routine
+from psyclone.transformations import (
+    ACCEnterDataTrans, ACCLoopTrans, ACCParallelTrans, ACCRoutineTrans,
+    KernelImportsToArguments)
 
 
-def trans(psy):
-    ''' Take the supplied psy object, apply OpenACC transformations
-    to the schedule of invoke_0 and return the new psy object '''
+def trans(psyir: Container) -> None:
+    ''' Take the supplied psyir object, apply OpenACC transformations
+    to the schedule of invoke_0. '''
     tinfo = TransInfo()
     parallel_trans = tinfo.get_trans_name('ACCParallelTrans')
     loop_trans = tinfo.get_trans_name('ACCLoopTrans')
-    enter_data_trans = tinfo.get_trans_name('ACCEnterDataTrans')
-    routine_trans = tinfo.get_trans_name('ACCRoutineTrans')
-    glo2arg_trans = tinfo.get_trans_name('KernelImportsToArguments')
-    inline_trans = KernelModuleInlineTrans()
+    enter_data_trans = ACCEnterDataTrans()
+    routine_trans = ACCRoutineTrans()
+    glo2arg_trans = KernelImportsToArguments()
+    mod_inline_trans = KernelModuleInlineTrans()
 
-    invoke = psy.invokes.get('invoke_0')
-    schedule = invoke.schedule
+    schedule = psyir.walk(Routine)[0]
 
     # Apply the OpenACC Loop transformation to *every* loop
     # in the schedule
     for child in schedule.children:
         if isinstance(child, Loop):
-            loop_trans.apply(child, {"collapse": 2})
+            opts = {"collapse": 2}
+            if child.kernels()[0].name == "bc_flather_v_code":
+                # We need to ignore dependencies on 'va' because PSyclone
+                # spots that there is a dependence in the bc_flather_v kernel.
+                # However, we know that practically this isn't a problem
+                # because of the way the domain (mask) is configured.
+                opts["ignore_dependencies_for"] = ["va%data"]
+            if child.kernels()[0].name == "bc_flather_u_code":
+                opts["ignore_dependencies_for"] = ["ua%data"]
+            loop_trans.apply(child, options=opts)
 
     # Put all of the loops in a single parallel region
     parallel_trans.apply(schedule)
@@ -37,6 +48,4 @@ def trans(psy):
     for kern in schedule.coded_kernels():
         glo2arg_trans.apply(kern)
         routine_trans.apply(kern)
-        inline_trans.apply(kern)
-
-    return psy
+        mod_inline_trans.apply(kern)
diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py
index 8235d3a0..6221871d 100644
--- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py
+++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py
@@ -3,10 +3,13 @@
 that PSyclone will generate an OpenCL PSy layer. '''
 
 import os
-from psyclone.psyGen import TransInfo
-from psyclone.domain.gocean.transformations import \
-    GOMoveIterationBoundariesInsideKernelTrans, GOOpenCLTrans
+
+from psyclone.domain.gocean.transformations import (
+    GOMoveIterationBoundariesInsideKernelTrans, GOOpenCLTrans)
 from psyclone.configuration import Config
+from psyclone.psyir.nodes import Routine
+from psyclone.transformations import (
+    KernelImportsToArguments)
 
 
 # Global variables to configure the PSyclone OpenCL generation:
@@ -33,13 +36,12 @@ def trans(psy):
     ''' Transform the schedule for OpenCL generation '''
 
     # Import transformations
-    tinfo = TransInfo()
-    globaltrans = tinfo.get_trans_name('KernelImportsToArguments')
+    globaltrans = KernelImportsToArguments()
     move_boundaries_trans = GOMoveIterationBoundariesInsideKernelTrans()
     cltrans = GOOpenCLTrans()
 
-    # Get the invoke routine
-    schedule = psy.invokes.get('invoke_0').schedule
+    # Get the routine
+    schedule = psy.walk(Routine)[0]
 
     # Map the kernels by their name to different OpenCL queues. The multiple
     # command queues can be executed concurrently while each command queue
diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py
index 17be3e07..b10e07ff 100644
--- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py
+++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py
@@ -2,20 +2,21 @@
 function via the -s option. It applies OpenMP tasking to every loop
 and inlines all kernels in the schedule.'''
 
-from psyclone.psyir.nodes import Loop
+from psyclone.psyir.nodes import Container, Loop, Routine
 from psyclone.configuration import Config
-from psyclone.transformations import OMPParallelTrans, OMPSingleTrans, \
-                                     OMPTaskloopTrans, KernelModuleInlineTrans
-from psyclone.psyir.transformations import OMPTaskwaitTrans
-from psyclone.psyir.nodes import OMPTaskloopDirective, OMPTaskwaitDirective, \
-                                 OMPDirective, OMPParallelDirective
+from psyclone.domain.common.transformations import KernelModuleInlineTrans
+from psyclone.transformations import (
+    OMPParallelTrans, OMPSingleTrans)
+from psyclone.psyir.transformations import OMPTaskloopTrans, OMPTaskwaitTrans
+from psyclone.psyir.nodes import (OMPTaskloopDirective, OMPTaskwaitDirective,
+                                  OMPDirective, OMPParallelDirective)
 
 
-def trans(psy):
+def trans(psyir: Container) -> None:
     '''Transformation entry point'''
     config = Config.get()
 
-    schedule = psy.invokes.get('invoke_0').schedule
+    schedule = psyir.walk(Routine)[0]
 
     loop_trans = OMPTaskloopTrans(grainsize=32, nogroup=True)
     wait_trans = OMPTaskwaitTrans()
@@ -28,7 +29,16 @@ def trans(psy):
 
     for child in schedule.children:
         if isinstance(child, Loop):
-            loop_trans.apply(child)
+            # We need to ignore dependencies on '{u,v}a' because PSyclone
+            # spots that there is a dependence in the bc_flather_{u,v} kernel.
+            # However, we know that practically this isn't a problem
+            # because of the way the domain (mask) is configured.
+            options = {}
+            if child.kernels()[0].name == "bc_flather_v_code":
+                options["ignore_dependencies_for"] = ["va%data"]
+            if child.kernels()[0].name == "bc_flather_u_code":
+                options["ignore_dependencies_for"] = ["ua%data"]
+            loop_trans.apply(child, options=options)
 
     single_trans = OMPSingleTrans()
     parallel_trans = OMPParallelTrans()
diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py
index 573cef38..ff085752 100644
--- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py
+++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py
@@ -5,11 +5,14 @@
 from psyclone.configuration import Config
 from psyclone.domain.common.transformations import KernelModuleInlineTrans
 from psyclone.psyGen import TransInfo
-from psyclone.psyir.nodes import Loop
+from psyclone.psyir.nodes import Container, Loop, Routine
 
 
-def trans(psy):
-    ''' Transformation entry point '''
+def trans(psyir: Container) -> None:
+    '''
+    Transformation entry point.
+
+    '''
     config = Config.get()
     tinfo = TransInfo()
     parallel_loop_trans = tinfo.get_trans_name('GOceanOMPParallelLoopTrans')
@@ -17,7 +20,7 @@ def trans(psy):
     parallel_trans = tinfo.get_trans_name('OMPParallelTrans')
     module_inline_trans = KernelModuleInlineTrans()
 
-    schedule = psy.invokes.get('invoke_0').schedule
+    schedule = psyir.walk(Routine)[0]
 
     # Inline all kernels in this Schedule
     for kernel in schedule.kernels():
@@ -26,15 +29,23 @@ def trans(psy):
     # Apply the OpenMPLoop transformation to every child in the schedule or
     # OpenMPParallelLoop to every Loop if it has distributed memory.
     for child in schedule.children:
+        # We need to ignore dependencies on '{u,v}a' because PSyclone correctly
+        # spots that there is a dependence in the bc_flather_{u,v} kernel.
+        # However, we know that practically this isn't a problem
+        # because these boundary-condition kernels only update values
+        # outside the domain.
+        options = {}
+        if child.kernels()[0].name == "bc_flather_v_code":
+            options["ignore_dependencies_for"] = ["va%data"]
+        if child.kernels()[0].name == "bc_flather_u_code":
+            options["ignore_dependencies_for"] = ["ua%data"]
         if config.distributed_memory:
             if isinstance(child, Loop):
-                parallel_loop_trans.apply(child)
+                parallel_loop_trans.apply(child, options=options)
         else:
-            loop_trans.apply(child)
+            loop_trans.apply(child, options=options)
 
     if not config.distributed_memory:
         # If it is not distributed memory, enclose all of these loops
         # within a single OpenMP PARALLEL region
         parallel_trans.apply(schedule.children)
-
-    return psy
diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py
index 1456cc2d..544e93ae 100644
--- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py
+++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py
@@ -2,14 +2,17 @@
 via the -s option. This script module-inline all kernels in the PSy-layer.'''
 
 from psyclone.domain.common.transformations import KernelModuleInlineTrans
+from psyclone.psyir.nodes import Node, Routine
 
 
-def trans(psy):
-    ''' Transformation script entry function '''
+def trans(psy: Node):
+    '''Entry point for PSyIR transformation. This script module-inlines
+    every user-supplied kernel that is called.
 
+    '''
     itrans = KernelModuleInlineTrans()
 
-    schedule = psy.invokes.get('invoke_0').schedule
+    schedule = psy.walk(Routine)[0]
 
     # Module-Inline all coded kernels in this Schedule
     for kernel in schedule.coded_kernels():
diff --git a/benchmarks/nemo/tracer_advection/compute_in_subroutine/Makefile b/benchmarks/nemo/tracer_advection/compute_in_subroutine/Makefile
index 90a55d02..593b6207 100644
--- a/benchmarks/nemo/tracer_advection/compute_in_subroutine/Makefile
+++ b/benchmarks/nemo/tracer_advection/compute_in_subroutine/Makefile
@@ -22,7 +22,7 @@ DL_TIMER_NAME = libdl_timer_omp.a
 
 # Shorthand for invoking PSyclone with line-length limiting applied
 # to the output Fortran.
-PSYCLONE = psyclone -api nemo -l output
+PSYCLONE = psyclone -l output
 
 # Serial version.
 tra_adv_serial: dl_timer
@@ -45,7 +45,7 @@ tra_adv_no_auto_serial: dl_timer
 # OpenACC version using Unified Memory with timer around outer loop only.
 tra_adv_acc: dl_timer
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_kernels_unified_memory_trans.py -opsy \
+	${PSYCLONE} -s ../scripts/acc_kernels_unified_memory_trans.py -o \
             $@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -58,7 +58,7 @@ ifndef PSYCLONE_NVIDIA_LIB_DIR
 	$(error The tra_adv_acc_prof must have the PSYCLONE_NVIDIA_LIB_DIR defined)
 endif
 	mkdir -p $@
-	${PSYCLONE} --profile invokes -s ../scripts/acc_kernels_unified_memory_trans.py -opsy \
+	${PSYCLONE} --profile routines -s ../scripts/acc_kernels_unified_memory_trans.py -o \
             $@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -68,7 +68,7 @@ endif
 # Serial Fortran version after transformation to SIR-compliant form.
 tra_adv_sir: dl_timer
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/sir_trans.py -opsy $@/tra_adv_compute.f90 \
+	${PSYCLONE} -s ../scripts/sir_trans.py -o $@/tra_adv_compute.f90 \
             ./tra_adv_compute_auto_arrays.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -78,7 +78,7 @@ tra_adv_sir: dl_timer
 # OpenACC added after transformation to SIR-compliant form.
 tra_adv_sir_acc: dl_timer
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/sir_kernels_trans.py -opsy \
+	${PSYCLONE} -s ../scripts/sir_kernels_trans.py -o \
             $@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -90,7 +90,7 @@ ifndef PSYCLONE_NVIDIA_LIB_DIR
 	$(error The tra_adv_sir_acc_prof must have the PSYCLONE_NVIDIA_LIB_DIR defined)
 endif
 	mkdir -p $@
-	${PSYCLONE} --profile invokes -s ../scripts/sir_kernels_trans.py -opsy \
+	${PSYCLONE} --profile routines -s ../scripts/sir_kernels_trans.py -o \
             $@/tra_adv_compute.f90 ./tra_adv_compute_auto_arrays.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -110,3 +110,4 @@ allclean: clean
 	rm -rf tra_adv_acc_prof
 	rm -rf tra_adv_sir
 	rm -rf tra_adv_sir_acc
+	rm -rf tra_adv_no_auto_serial
diff --git a/benchmarks/nemo/tracer_advection/multi_kernel/Makefile b/benchmarks/nemo/tracer_advection/multi_kernel/Makefile
index 83b3d966..ab8b09bc 100644
--- a/benchmarks/nemo/tracer_advection/multi_kernel/Makefile
+++ b/benchmarks/nemo/tracer_advection/multi_kernel/Makefile
@@ -22,7 +22,7 @@ DL_TIMER_DIR = ../../../../shared/dl_timer
 DL_TIMER_NAME = libdl_timer_omp.a
 
 # Shorthand for invoking PSyclone.
-PSYCLONE = psyclone -api nemo -l output
+PSYCLONE = psyclone -l output
 
 # Serial version.
 tra_adv_serial: dl_timer
@@ -36,7 +36,7 @@ tra_adv_serial: dl_timer
 # OpenACC version with timer around outer loop only.
 tra_adv_acc: dl_timer
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_kernels_unified_memory_trans.py -opsy \
+	${PSYCLONE} -s ../scripts/acc_kernels_unified_memory_trans.py -o \
             $@/tra_adv_compute.f90 ./tra_adv_compute.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -49,7 +49,7 @@ ifndef PSYCLONE_NVIDIA_LIB_DIR
 	$(error The tra_adv_acc_prof must have the PSYCLONE_NVIDIA_LIB_DIR defined)
 endif
 	mkdir -p $@
-	${PSYCLONE} --profile invokes -s ../scripts/kernels_trans.py -opsy \
+	${PSYCLONE} --profile routines -s ../scripts/acc_kernels_unified_memory_trans.py -o \
             $@/tra_adv_compute.f90 ./tra_adv_compute.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -59,7 +59,7 @@ endif
 # Serial Fortran version after transformation to SIR-compliant form.
 tra_adv_sir: dl_timer
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/sir_trans.py -opsy $@/tra_adv_compute.f90 \
+	${PSYCLONE} -s ../scripts/sir_trans.py -o $@/tra_adv_compute.f90 \
             ./tra_adv_compute.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
@@ -69,7 +69,7 @@ tra_adv_sir: dl_timer
 # OpenACC added after transformation to SIR-compliant form.
 tra_adv_sir_acc: dl_timer
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/sir_kernels_trans.py -opsy \
+	${PSYCLONE} -s ../scripts/sir_kernels_trans.py -o \
             $@/tra_adv_compute.f90 ./tra_adv_compute.F90
 	cp Makefile_gen $@/Makefile
 	cp tra_adv_driver.F90 $@/.
diff --git a/benchmarks/nemo/tracer_advection/original/Makefile b/benchmarks/nemo/tracer_advection/original/Makefile
index 16d37045..d535c660 100644
--- a/benchmarks/nemo/tracer_advection/original/Makefile
+++ b/benchmarks/nemo/tracer_advection/original/Makefile
@@ -49,7 +49,7 @@ DL_TIMER_DIR = ../../../../shared/dl_timer
 DL_TIMER_NAME = libdl_timer_omp.a
 
 # Shorthand for invoking PSyclone.
-PSYCLONE = psyclone -api nemo -l output ${PSYCLONE_PROFILE}
+PSYCLONE = psyclone -l output ${PSYCLONE_PROFILE}
 
 # Add necessary flags for Nvidia nvtx instrumentation
 ifeq ($(ENABLE_NVIDIA_PROFILE),yes)
@@ -71,14 +71,14 @@ tra_adv_serial: dl_timer ./tra_adv.F90
 
 tra_adv_omp_cpu_levels: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/omp_cpu_levels_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/omp_cpu_levels_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${OMPFLAGS} -I../${DL_TIMER_DIR}/src" \
             LDFLAGS="${LDFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
 
 tra_adv_omp_cpu: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/omp_cpu_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/omp_cpu_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${OMPFLAGS} -I../${DL_TIMER_DIR}/src" \
             LDFLAGS="${LDFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
@@ -87,42 +87,42 @@ tra_adv_omp_cpu: dl_timer ./tra_adv.F90
 
 tra_adv_acc_kernels_unified_memory: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_kernels_unified_memory_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/acc_kernels_unified_memory_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} ${UMEMFLAGS} -I../${DL_TIMER_DIR}/src" \
            LDFLAGS="${LDFLAGS} ${ACCFLAGS} ${UMEMFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
 
 tra_adv_acc_kernels_explicit_data_movement: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_kernels_explicit_data_movement_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/acc_kernels_explicit_data_movement_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} -I../${DL_TIMER_DIR}/src" \
            LDFLAGS="${LDFLAGS} ${ACCFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
 
 tra_adv_acc_loops_unified_memory: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_loops_unified_memory_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/acc_loops_unified_memory_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} ${UMEMFLAGS} -I../${DL_TIMER_DIR}/src" \
            LDFLAGS="${LDFLAGS} ${ACCFLAGS} ${UMEMFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
 
 tra_adv_acc_loops_explicit_data_movement: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_loops_explicit_data_movement_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/acc_loops_explicit_data_movement_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} -I../${DL_TIMER_DIR}/src" \
            LDFLAGS="${LDFLAGS} ${ACCFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
 
 tra_adv_acc_mixed_unified_memory: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_mixed_unified_memory_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/acc_mixed_unified_memory_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} ${UMEMFLAGS} -I../${DL_TIMER_DIR}/src" \
            LDFLAGS="${LDFLAGS} ${ACCFLAGS} ${UMEMFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
 
 tra_adv_acc_mixed_explicit_data_movement: dl_timer ./tra_adv.F90
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/acc_mixed_explicit_data_movement_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/acc_mixed_explicit_data_movement_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} FORT_FLAGS="${F90FLAGS} ${ACCFLAGS} -I../${DL_TIMER_DIR}/src" \
            LDFLAGS="${LDFLAGS} ${ACCFLAGS} ../${DL_TIMER_DIR}/${DL_TIMER_NAME}" -C $@
@@ -132,7 +132,7 @@ ifndef UMEMFLAGS
 	$(error The OMP offload target requires OpenMP unified memory but the UMEMFLAGS environment variable is not set)
 endif
 	mkdir -p $@
-	${PSYCLONE} -s ../scripts/omp_gpu_trans.py -opsy $@/tra_adv.f90 ./tra_adv.F90
+	${PSYCLONE} -s ../scripts/omp_gpu_trans.py -o $@/tra_adv.f90 ./tra_adv.F90
 	cp Makefile_gen $@/Makefile
 	${MAKE} PROF_LIB_INC="-I../${DL_TIMER_DIR}/src" \
             FORT_FLAGS="${F90FLAGS} ${OMPTARGETFLAGS} ${UMEMFLAGS} -I../${DL_TIMER_DIR}/src" \
diff --git a/benchmarks/nemo/tracer_advection/scripts/acc_kernels_explicit_data_movement_trans.py b/benchmarks/nemo/tracer_advection/scripts/acc_kernels_explicit_data_movement_trans.py
index 6260568f..a8375404 100644
--- a/benchmarks/nemo/tracer_advection/scripts/acc_kernels_explicit_data_movement_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/acc_kernels_explicit_data_movement_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2018-2022, Science and Technology Facilities Council.
+# Copyright (c) 2018-2025, Science and Technology Facilities Council.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,7 @@
 
 Once you have psyclone installed, this may be used by doing:
 
- $ psyclone -api nemo -s <this_script> <target_source_file>
+ $ psyclone -s <this_script> <target_source_file>
 
 The transformation script attempts to insert Kernels directives at the
 highest possible location(s) in the schedule tree (i.e. to enclose as
@@ -47,30 +47,26 @@
 
 '''
 
-from psyclone.psyir.nodes import Directive
+from psyclone.psyir.nodes import Directive, Routine, Node
 from psyclone.psyir.transformations import ACCUpdateTrans
 from psyclone.transformations import ACCEnterDataTrans
 from utils import add_kernels
 
 
-def trans(psy):
+def trans(psyir: Node) -> None:
     '''A PSyclone-script compliant transformation function. Applies
-    OpenACC 'kernels' and 'data movement' directives to NEMO code.
+    OpenACC 'kernels' and 'data movement' directives to generic code.
 
-    :param psy: The PSy layer object to apply transformations to.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-    '''
-
-    print("Invokes found:")
-    print("\n".join([str(name) for name in psy.invokes.names]))
+    :param psyir: The PSyIR to apply transformations to.
 
-    for invoke in psy.invokes.invoke_list:
+    '''
+    for sched in psyir.walk(Routine):
 
-        if not invoke.schedule:
-            print(f"Invoke {invoke.name} has no Schedule! Skipping...")
+        if not sched.children:
+            print(f"Routine {sched.name} is empty! Skipping...")
             continue
 
-        add_kernels(invoke.schedule.children)
-        if invoke.schedule.walk(Directive):
-            ACCEnterDataTrans().apply(invoke.schedule)
-            ACCUpdateTrans().apply(invoke.schedule)
+        add_kernels(sched.children)
+        if sched.walk(Directive):
+            ACCEnterDataTrans().apply(sched)
+            ACCUpdateTrans().apply(sched)
diff --git a/benchmarks/nemo/tracer_advection/scripts/acc_kernels_unified_memory_trans.py b/benchmarks/nemo/tracer_advection/scripts/acc_kernels_unified_memory_trans.py
index fd9ad97b..904ffef9 100644
--- a/benchmarks/nemo/tracer_advection/scripts/acc_kernels_unified_memory_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/acc_kernels_unified_memory_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2018-2022, Science and Technology Facilities Council.
+# Copyright (c) 2018-2025, Science and Technology Facilities Council.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -34,12 +34,12 @@
 # Authors: R. W. Ford, A. R. Porter and S. Siso, STFC Daresbury Lab
 
 '''A transformation script that seeks to apply OpenACC KERNELS directives to
-NEMO style code. In order to use it you must first install PSyclone. See
+generic Fortran code. In order to use it you must first install PSyclone. See
 README.md in the top-level directory.
 
 Once you have psyclone installed, this may be used by doing:
 
- $ psyclone -api nemo -s <this_script> <target_source_file>
+ $ psyclone -s <this_script> <target_source_file>
 
 The transformation script attempts to insert Kernels directives at the
 highest possible location(s) in the schedule tree (i.e. to enclose as
@@ -47,24 +47,25 @@
 
 '''
 
+from psyclone.psyir.nodes import Node, Routine
+
 from utils import add_kernels
 
 
-def trans(psy):
+def trans(psyir: Node) -> None:
     '''A PSyclone-script compliant transformation function. Applies
-    OpenACC 'kernels' to NEMO code.
+    OpenACC 'kernels' to existing code.
 
-    :param psy: The PSy layer object to apply transformations to.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-    '''
+    :param psyir: The PSyIR to apply transformations to.
 
-    print("Invokes found:")
-    print("\n".join([str(name) for name in psy.invokes.names]))
+    '''
+    print("Routines found:")
 
-    for invoke in psy.invokes.invoke_list:
+    for routine in psyir.walk(Routine):
+        print(routine.name)
 
-        if not invoke.schedule:
-            print(f"Invoke {invoke.name} has no Schedule! Skipping...")
+        if not routine.children:
+            print(f"Routine {routine.name} is empty! Skipping...")
             continue
 
-        add_kernels(invoke.schedule.children)
+        add_kernels(routine.children)
diff --git a/benchmarks/nemo/tracer_advection/scripts/acc_loops_explicit_data_movement_trans.py b/benchmarks/nemo/tracer_advection/scripts/acc_loops_explicit_data_movement_trans.py
index 4c5d1ebc..ac42bce2 100644
--- a/benchmarks/nemo/tracer_advection/scripts/acc_loops_explicit_data_movement_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/acc_loops_explicit_data_movement_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2022-2023, Science and Technology Facilities Council.
+# Copyright (c) 2022-2025, Science and Technology Facilities Council.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -37,47 +37,41 @@
 to the outermost loop that is parallelisable, including implicit loops. This
 script also adds OpenACC explicit data movement directives.'''
 
-from psyclone.psyir.nodes import Directive
-from psyclone.psyGen import TransInfo
+from psyclone.psyir.nodes import Directive, Node, Routine
 from psyclone.psyir.transformations import ACCUpdateTrans
-from psyclone.transformations import ACCEnterDataTrans
+from psyclone.transformations import (
+    ACCEnterDataTrans, ACCLoopTrans, ACCParallelTrans)
 from utils import insert_explicit_loop_parallelism, normalise_loops
 
 
-def trans(psy):
+def trans(psyir: Node) -> None:
     ''' Add OpenACC Parallel Loop directive to all loops, including implicit
     ones, to target GPU parallelism and explicit data movement directives.
 
-    :param psy: the PSy object which this script will transform.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-
-    :returns: the transformed PSy object.
-    :rtype: :py:class:`psyclone.psyGen.PSy`
+    :param psyir: the PSyIR which this script will transform.
 
     '''
-    acc_parallel_trans = TransInfo().get_trans_name('ACCParallelTrans')
-    acc_loop_trans = TransInfo().get_trans_name('ACCLoopTrans')
+    acc_parallel_trans = ACCParallelTrans()
+    acc_loop_trans = ACCLoopTrans()
 
-    print("Invokes found:")
-    for invoke in psy.invokes.invoke_list:
-        print(invoke.name)
+    print("Routines found:")
+    for routine in psyir.walk(Routine):
+        print(routine.name)
 
         # Convert array and range notation to loops and hoist expressions
         normalise_loops(
-            invoke.schedule,
-            unwrap_array_ranges=True,
+            routine,
+            scalarise_loops=True,
             hoist_expressions=True,
         )
 
         insert_explicit_loop_parallelism(
-            invoke.schedule,
+            routine,
             region_directive_trans=acc_parallel_trans,
             loop_directive_trans=acc_loop_trans,
             collapse=True
         )
 
-        if invoke.schedule.walk(Directive):
-            ACCEnterDataTrans().apply(invoke.schedule)
-            ACCUpdateTrans().apply(invoke.schedule)
-
-    return psy
+        if routine.walk(Directive):
+            ACCEnterDataTrans().apply(routine)
+            ACCUpdateTrans().apply(routine)
diff --git a/benchmarks/nemo/tracer_advection/scripts/acc_loops_unified_memory_trans.py b/benchmarks/nemo/tracer_advection/scripts/acc_loops_unified_memory_trans.py
index 1557dd54..efef8816 100644
--- a/benchmarks/nemo/tracer_advection/scripts/acc_loops_unified_memory_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/acc_loops_unified_memory_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2022-2023, Science and Technology Facilities Council.
+# Copyright (c) 2022-2025, Science and Technology Facilities Council.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,38 +36,35 @@
 ''' PSyclone transformation script to insert OpenACC Parallel Loop directives
 to the outermost loop that is parallelisable, including implicit loops.'''
 
-from psyclone.psyGen import TransInfo
+from psyclone.psyir.nodes import Node, Routine
+from psyclone.transformations import ACCParallelTrans, ACCLoopTrans
+
 from utils import insert_explicit_loop_parallelism, normalise_loops
 
 
-def trans(psy):
+def trans(psyir: Node) -> None:
     ''' Add OpenACC Parallel Loop directive to all loops, including implicit
     ones to target GPU parallelism.
 
-    :param psy: the PSy object which this script will transform.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-    :returns: the transformed PSy object.
-    :rtype: :py:class:`psyclone.psyGen.PSy`
+    :param psyir: the PSyIR which this script will transform.
 
     '''
-    acc_parallel_trans = TransInfo().get_trans_name('ACCParallelTrans')
-    acc_loop_trans = TransInfo().get_trans_name('ACCLoopTrans')
+    acc_parallel_trans = ACCParallelTrans()
+    acc_loop_trans = ACCLoopTrans()
 
-    print("Invokes found:")
-    for invoke in psy.invokes.invoke_list:
-        print(invoke.name)
+    print("Routines found:")
+    for routine in psyir.walk(Routine):
+        print(routine.name)
 
         normalise_loops(
-            invoke.schedule,
-            unwrap_array_ranges=True,
+            routine,
             hoist_expressions=True,
         )
 
         insert_explicit_loop_parallelism(
-            invoke.schedule,
+            routine,
             region_directive_trans=acc_parallel_trans,
             loop_directive_trans=acc_loop_trans,
             collapse=True
         )
 
-    return psy
diff --git a/benchmarks/nemo/tracer_advection/scripts/acc_mixed_explicit_data_movement_trans.py b/benchmarks/nemo/tracer_advection/scripts/acc_mixed_explicit_data_movement_trans.py
index db5dee0b..b8f63f50 100644
--- a/benchmarks/nemo/tracer_advection/scripts/acc_mixed_explicit_data_movement_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/acc_mixed_explicit_data_movement_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2018-2023, Science and Technology Facilities Council.
+# Copyright (c) 2018-2025, Science and Technology Facilities Council.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,7 @@
 
 Once you have psyclone installed, this may be used by doing:
 
- $ psyclone -api nemo -s ./acc_mixed_explicit_data_movement_trans.py <file>
+ $ psyclone -s ./acc_mixed_explicit_data_movement_trans.py <file>
 
 This should produce a lot of output, ending with generated Fortran. Note
 that the Fortran source files provided to PSyclone must have already been
@@ -47,50 +47,48 @@
 
 '''
 
-from psyclone.psyir.nodes import Directive
+from psyclone.psyir.nodes import Directive, Node, Routine
 from psyclone.psyir.transformations import ACCUpdateTrans
 from psyclone.transformations import ACCEnterDataTrans, ACCLoopTrans
 from utils import add_kernels, normalise_loops, \
     insert_explicit_loop_parallelism
 
 
-def trans(psy):
+def trans(psyir: Node) -> None:
     '''A PSyclone-script compliant transformation function. Applies
     OpenACC 'kernels', 'loop' and explicit 'data' directives to NEMO code.
 
-    :param psy: The PSy layer object to apply transformations to.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-    '''
+    :param psyir: The PSyIR to apply transformations to.
 
-    print("Invokes found:")
-    print("\n".join([str(name) for name in psy.invokes.names]))
+    '''
+    print("Routines found:")
+    print("\n".join([rt.name for rt in psyir.walk(Routine)]))
 
-    for invoke in psy.invokes.invoke_list:
+    for routine in psyir.walk(Routine):
 
-        sched = invoke.schedule
-        if not sched:
-            print("Invoke {invoke.name} has no Schedule! Skipping...")
+        if not routine.children:
+            print("Routine {routine.name} is empty! Skipping...")
             continue
 
         # Convert array and range syntax to explicit loops
         normalise_loops(
-            invoke.schedule,
-            unwrap_array_ranges=True,
+            routine,
+            scalarise_loops=True,
             hoist_expressions=True,
         )
 
         # Add OpenACC Loop directives
         insert_explicit_loop_parallelism(
-            invoke.schedule,
+            routine,
             region_directive_trans=None,
             loop_directive_trans=ACCLoopTrans(),
             collapse=True
         )
 
         # Add OpenACC Kernel directives
-        add_kernels(sched.children)
+        add_kernels(routine.children)
 
         # Add OpenACC data directives
-        if invoke.schedule.walk(Directive):
-            ACCEnterDataTrans().apply(invoke.schedule)
-            ACCUpdateTrans().apply(invoke.schedule)
+        if routine.walk(Directive):
+            ACCEnterDataTrans().apply(routine)
+            ACCUpdateTrans().apply(routine)
diff --git a/benchmarks/nemo/tracer_advection/scripts/acc_mixed_unified_memory_trans.py b/benchmarks/nemo/tracer_advection/scripts/acc_mixed_unified_memory_trans.py
index 3cc7b6c0..c3cd28cb 100644
--- a/benchmarks/nemo/tracer_advection/scripts/acc_mixed_unified_memory_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/acc_mixed_unified_memory_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2018-2023, Science and Technology Facilities Council.
+# Copyright (c) 2018-2025, Science and Technology Facilities Council.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -47,43 +47,42 @@
 
 '''
 
+from psyclone.psyir.nodes import Node, Routine
 from psyclone.transformations import ACCLoopTrans
 from utils import add_kernels, normalise_loops, \
     insert_explicit_loop_parallelism
 
 
-def trans(psy):
+def trans(psyir: Node) -> Node:
     '''A PSyclone-script compliant transformation function. Applies
     OpenACC 'kernels' and 'loop' directives to NEMO code.
 
-    :param psy: The PSy layer object to apply transformations to.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-    '''
+    :param psyir: The PSyIR to apply transformations to.
 
-    print("Invokes found:")
-    print("\n".join([str(name) for name in psy.invokes.names]))
+    '''
+    print("Routines found:")
+    print("\n".join([rt.name for rt in psyir.walk(Routine)]))
 
-    for invoke in psy.invokes.invoke_list:
+    for routine in psyir.walk(Routine):
 
-        sched = invoke.schedule
-        if not sched:
-            print("Invoke {invoke.name} has no Schedule! Skipping...")
+        if not routine.children:
+            print("Invoke {routine.name} is empty! Skipping...")
             continue
 
         # Convert array and range syntax to explicit loops
         normalise_loops(
-            invoke.schedule,
-            unwrap_array_ranges=True,
+            routine,
+            scalarise_loops=True,
             hoist_expressions=True,
         )
 
         # Add OpenACC Loop directives
         insert_explicit_loop_parallelism(
-            invoke.schedule,
+            routine,
             region_directive_trans=None,
             loop_directive_trans=ACCLoopTrans(),
             collapse=True
         )
 
         # Add OpenACC Kernel directives
-        add_kernels(sched.children)
+        add_kernels(routine)
diff --git a/benchmarks/nemo/tracer_advection/scripts/omp_cpu_levels_trans.py b/benchmarks/nemo/tracer_advection/scripts/omp_cpu_levels_trans.py
index bc6b7ef9..c2e9d9fb 100644
--- a/benchmarks/nemo/tracer_advection/scripts/omp_cpu_levels_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/omp_cpu_levels_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2018-2023, Science and Technology Facilities Council
+# Copyright (c) 2018-2025, Science and Technology Facilities Council
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -33,40 +33,43 @@
 # -----------------------------------------------------------------------------
 # Authors: R. W. Ford, A. R. Porter and S. Siso, STFC Daresbury Lab
 
-'''A simple transformation script for the introduction of OpenMP with PSyclone.
+'''A very simple transformation script for the introduction of OpenMP
+ to certain loops using PSyclone.
 
- >>> psyclone -api "nemo" -s ./omp_cpu_levels_trans.py tra_adv.F90
+ >>> psyclone -s ./omp_cpu_levels_trans.py tra_adv.F90
 
 This should produce a lot of output, ending with generated Fortran.
 
 '''
 
-from psyclone.psyGen import TransInfo
-from psyclone.nemo import NemoKern
+from psyclone.psyir.nodes import Loop, Node, Routine
+from psyclone.transformations import OMPParallelLoopTrans, TransformationError
 
-def trans(psy):
+# Set up some loop_type inference rules in order to reference useful domain
+# loop constructs by name
+Loop.set_loop_type_inference_rules({
+    "lon": {"variable": "ji"},
+    "lat": {"variable": "jj"},
+    "levels": {"variable": "jk"}
+})
+
+
+def trans(psyir: Node) -> None:
     ''' Transform a specific Schedule by making all loops
     over levels OpenMP parallel.
 
-    :param psy: the object holding all information on the PSy layer \
-                to be modified.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-
-    :returns: the transformed PSy object
-    :rtype:  :py:class:`psyclone.psyGen.PSy`
+    :param psyir: the PSyIR to be modified.
 
     '''
     # Get the transformation we will apply
-    ompt = TransInfo().get_trans_name('OMPParallelLoopTrans')
-    for invoke in psy.invokes.invoke_list:
-        # Get the Schedule of the target routine
-        sched = invoke.schedule
+    ompt = OMPParallelLoopTrans()
+    for sched in psyir.walk(Routine):
         # Apply the OMP transformation to each loop over levels containing
         # a kernel
         for loop in sched.loops():
-            kernels = loop.walk(NemoKern)
-            if kernels and loop.loop_type == "levels":
-                ompt.apply(loop)
-
-    # Return the modified psy object
-    return psy
+            if loop.loop_type == "levels":
+                try:
+                    ompt.apply(loop)
+                except TransformationError as err:
+                    loop.append_preceding_comment(
+                        f"Loop cannot be parallelised because: {err}")
diff --git a/benchmarks/nemo/tracer_advection/scripts/omp_cpu_trans.py b/benchmarks/nemo/tracer_advection/scripts/omp_cpu_trans.py
index c77813e7..1860eead 100644
--- a/benchmarks/nemo/tracer_advection/scripts/omp_cpu_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/omp_cpu_trans.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # BSD 3-Clause License
 #
-# Copyright (c) 2022, Science and Technology Facilities Council.
+# Copyright (c) 2022-2025, Science and Technology Facilities Council.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,38 +36,34 @@
 ''' PSyclone transformation script to insert OpenMP Parallel Loop directives
 to the outermost loop that is parallelisable, including implicit loops.'''
 
-from psyclone.psyGen import TransInfo
+from psyclone.psyir.nodes import Node, Routine
+from psyclone.transformations import OMPParallelTrans, OMPLoopTrans
 from utils import insert_explicit_loop_parallelism, normalise_loops
 
 
-def trans(psy):
+def trans(psyir: Node) -> None:
     ''' Add OpenMP Parallel Loop directive to all loops, including implicit
     ones to target CPU parallelism.
 
-    :param psy: the PSy object which this script will transform.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-    :returns: the transformed PSy object.
-    :rtype: :py:class:`psyclone.psyGen.PSy`
+    :param psy: the PSyIR which this script will transform.
 
     '''
-    omp_parallel_trans = TransInfo().get_trans_name('OMPParallelTrans')
-    omp_loop_trans = TransInfo().get_trans_name('OMPLoopTrans')
+    omp_parallel_trans = OMPParallelTrans()
+    omp_loop_trans = OMPLoopTrans()
 
-    print("Invokes found:")
-    for invoke in psy.invokes.invoke_list:
-        print(invoke.name)
+    print("Routines: found:")
+    for routine in psyir.walk(Routine):
+        print(routine.name)
 
         normalise_loops(
-                invoke.schedule,
-                unwrap_array_ranges=True,
+                routine,
+                convert_array_notation=True,
                 hoist_expressions=False,
         )
 
         insert_explicit_loop_parallelism(
-                invoke.schedule,
+                routine,
                 region_directive_trans=omp_parallel_trans,
                 loop_directive_trans=omp_loop_trans,
                 collapse=False
         )
-
-    return psy
diff --git a/benchmarks/nemo/tracer_advection/scripts/omp_gpu_trans.py b/benchmarks/nemo/tracer_advection/scripts/omp_gpu_trans.py
index be2f4927..6612a9f5 100644
--- a/benchmarks/nemo/tracer_advection/scripts/omp_gpu_trans.py
+++ b/benchmarks/nemo/tracer_advection/scripts/omp_gpu_trans.py
@@ -36,19 +36,17 @@
 ''' PSyclone transformation script to insert OpenMP Target Loop directives
 to the outermost loop that is parallelisable, including implicit loops. '''
 
+from psyclone.psyir.nodes import Node, Routine
 from psyclone.psyir.transformations import OMPTargetTrans, OMPLoopTrans
 from utils import insert_explicit_loop_parallelism, normalise_loops
 
 
-def trans(psy):
+def trans(psyir: Node) -> None:
     ''' Add OpenMP Target and Loop directives to all loops, including the
     implicit ones, to parallelise the code and execute it in an acceleration
     device.
 
-    :param psy: the PSy object which this script will transform.
-    :type psy: :py:class:`psyclone.psyGen.PSy`
-    :returns: the transformed PSy object.
-    :rtype: :py:class:`psyclone.psyGen.PSy`
+    :param psyir: the PSyIR which this script will transform.
 
     '''
     omp_target_trans = OMPTargetTrans()
@@ -56,21 +54,17 @@ def trans(psy):
     omp_loop_trans.omp_directive = "teamsdistributeparalleldo"
     omp_loop_trans.omp_schedule = "none"
 
-    print("Invokes found:")
-    for invoke in psy.invokes.invoke_list:
-        print(invoke.name)
+    print("Routines found:")
+    for routine in psyir.walk(Routine):
+        print(routine.name)
 
         normalise_loops(
-                invoke.schedule,
-                unwrap_array_ranges=True,
-                hoist_expressions=True,
+                routine,
         )
 
         insert_explicit_loop_parallelism(
-                invoke.schedule,
+                routine,
                 region_directive_trans=omp_target_trans,
                 loop_directive_trans=omp_loop_trans,
                 collapse=True
         )
-
-    return psy
diff --git a/benchmarks/nemo/tracer_advection/scripts/problemsize.sh b/benchmarks/nemo/tracer_advection/scripts/problemsize.sh
index 09441c3e..f7b7d985 100755
--- a/benchmarks/nemo/tracer_advection/scripts/problemsize.sh
+++ b/benchmarks/nemo/tracer_advection/scripts/problemsize.sh
@@ -2,6 +2,8 @@
 
 # Bash script to execute the tracer-advection benchmark with increasing
 # domain sizes.
+# By default the process is pinned to core 0. Please edit the taskset
+# command below if you wish to change this.
 
 if [ "$#" -lt 1 ] || [ ! -x "$1" ]; then
     echo "Wrong arguments. Usage: ../../problemsize.sh ./executable"
@@ -24,7 +26,8 @@ for power in $(seq 4 9); do
     export JPI=${size}
     export JPJ=${size}
 
-    time=$(taskset -c 2 $@  | awk '{if ($1 == "Time-stepping") {print $5} }')
+    # Execute - use taskset to pin the process to a core.
+    time=$(taskset -c 0 $@  | awk '{if ($1 == "Time-stepping") {print $5} }')
 
     echo $size $time
 done
diff --git a/benchmarks/nemo/tracer_advection/scripts/utils.py b/benchmarks/nemo/tracer_advection/scripts/utils.py
index 8c9f530a..db02f2be 100644
--- a/benchmarks/nemo/tracer_advection/scripts/utils.py
+++ b/benchmarks/nemo/tracer_advection/scripts/utils.py
@@ -35,16 +35,29 @@
 
 ''' Utilities file to parallelise Nemo code. '''
 
-from psyclone.domain.nemo.transformations import NemoAllArrayRange2LoopTrans
+import os
+from typing import List, Union
+
 from psyclone.errors import InternalError
-from psyclone.psyir.nodes import Loop, Assignment, Directive, CodeBlock, Call
-from psyclone.psyir.transformations import HoistLoopBoundExprTrans, HoistTrans
-from psyclone.transformations import TransformationError, ACCKernelsTrans
+from psyclone.psyir.nodes import (
+    Assignment, Directive, CodeBlock, Call, IfBlock, IntrinsicCall, Loop, Node,
+    Reference, Return, Routine, Schedule, StructureReference)
+from psyclone.psyir.symbols import DataSymbol
+from psyclone.psyir.transformations import (
+    ACCKernelsTrans, ArrayAssignment2LoopsTrans, HoistLocalArraysTrans,
+    HoistLoopBoundExprTrans,
+    HoistTrans, Maxval2LoopTrans, OMPMinimiseSyncTrans,
+    Reference2ArrayRangeTrans, ScalarisationTrans)
+from psyclone.transformations import TransformationError
 
 
 def normalise_loops(
         schedule,
-        unwrap_array_ranges: bool = True,
+        hoist_local_arrays: bool = True,
+        convert_array_notation: bool = True,
+        loopify_array_intrinsics: bool = True,
+        convert_range_loops: bool = True,
+        scalarise_loops: bool = False,
         hoist_expressions: bool = True,
         ):
     ''' Normalise all loops in the given schedule so that they are in an
@@ -52,20 +65,77 @@ def normalise_loops(
     them.
 
     :param schedule: the PSyIR Schedule to transform.
-    :param unwrap_array_ranges: whether to convert ranges to explicit loops.
-    :param hoist_expressions: whether to hoist bounds and loop invariant \
+    :type schedule: :py:class:`psyclone.psyir.nodes.node`
+    :param bool hoist_local_arrays: whether to hoist local arrays.
+    :param bool convert_array_notation: whether to convert array notation
+        to explicit loops.
+    :param bool loopify_array_intrinsics: whether to convert intrinsics that
+        operate on arrays to explicit loops (currently only maxval).
+    :param bool convert_range_loops: whether to convert ranges to explicit
+        loops.
+    :param scalarise_loops: whether to attempt to convert arrays to scalars
+        where possible, default is False.
+    :param hoist_expressions: whether to hoist bounds and loop invariant
         statements out of the loop nest.
     '''
-    if unwrap_array_ranges:
+    if hoist_local_arrays:
+        # Apply the HoistLocalArraysTrans when possible, it cannot be applied
+        # to files with statement functions because it will attempt to put the
+        # allocate above it, which is not valid Fortran.
+        try:
+            HoistLocalArraysTrans().apply(schedule)
+        except TransformationError:
+            pass
+
+    if convert_array_notation:
+        # Make sure all array dimensions are explicit
+        for reference in schedule.walk(Reference):
+            part_of_the_call = reference.ancestor(Call)
+            if part_of_the_call:
+                if not part_of_the_call.is_elemental:
+                    continue
+            if isinstance(reference.symbol, DataSymbol):
+                try:
+                    Reference2ArrayRangeTrans().apply(reference)
+                except TransformationError:
+                    pass
+
+    if loopify_array_intrinsics:
+        for intr in schedule.walk(IntrinsicCall):
+            if intr.intrinsic.name == "MAXVAL":
+                try:
+                    Maxval2LoopTrans().apply(intr)
+                except TransformationError as err:
+                    print(err.value)
+
+    if convert_range_loops:
         # Convert all array implicit loops to explicit loops
-        explicit_loops = NemoAllArrayRange2LoopTrans()
+        explicit_loops = ArrayAssignment2LoopsTrans()
         for assignment in schedule.walk(Assignment):
-            explicit_loops.apply(assignment)
+            if assignment.walk(StructureReference):
+                continue  # TODO #2951 Fix issues with structure_refs
+            try:
+                explicit_loops.apply(assignment)
+            except TransformationError:
+                pass
+
+    if scalarise_loops:
+        # Apply scalarisation to every loop. Execute this in reverse order
+        # as sometimes we can scalarise earlier loops if following loops
+        # have already been scalarised.
+        loops = schedule.walk(Loop)
+        loops.reverse()
+        scalartrans = ScalarisationTrans()
+        for loop in loops:
+            scalartrans.apply(loop)
 
     if hoist_expressions:
         # First hoist all possible expressions
         for loop in schedule.walk(Loop):
-            HoistLoopBoundExprTrans().apply(loop)
+            try:
+                HoistLoopBoundExprTrans().apply(loop)
+            except TransformationError:
+                pass
 
         # Hoist all possible assignments (in reverse order so the inner loop
         # constants are hoisted all the way out if possible)
@@ -81,47 +151,76 @@ def insert_explicit_loop_parallelism(
         schedule,
         region_directive_trans=None,
         loop_directive_trans=None,
-        collapse: bool = True
+        collapse: bool = True,
+        privatise_arrays: bool = False,
+        asynchronous_parallelism: bool = False,
+        uniform_intrinsics_only: bool = False,
+        enable_reductions: bool = False,
         ):
     ''' For each loop in the schedule that doesn't already have a Directive
     as an ancestor, attempt to insert the given region and loop directives.
 
-    :param region_directive_trans: PSyclone transformation to insert the \
+    :param schedule: the PSyIR Schedule to transform.
+    :type schedule: :py:class:`psyclone.psyir.nodes.node`
+    :param region_directive_trans: PSyclone transformation that inserts the
         region directive.
-    :param loop_directive_trans: PSyclone transformation to use to insert the \
-        loop directive.
-    :param collapse: whether to attempt to insert the collapse clause to as \
+    :type region_directive_trans: \
+        :py:class:`psyclone.transformation.Transformation`
+    :param loop_directive_trans: PSyclone transformation that inserts the
+        loop parallelisation directive.
+    :type loop_directive_trans: \
+        :py:class:`psyclone.transformation.Transformation`
+    :param collapse: whether to attempt to insert the collapse clause to as
         many nested loops as possible.
+    :param privatise_arrays: whether to attempt to privatise arrays that cause
+        write-write race conditions.
+    :param asynchronous_parallelism: whether to attempt to add asynchronocity
+    to the parallel sections.
+    :param uniform_intrinsics_only: if True it prevent offloading loops
+        with non-reproducible device intrinsics.
+    :param enable_reductions: whether to enable generation of reduction
+        clauses automatically.
+
     '''
+    nemo_v4 = os.environ.get('NEMOV4', False)
 
     # Add the parallel directives in each loop
     for loop in schedule.walk(Loop):
         if loop.ancestor(Directive):
             continue  # Skip if an outer loop is already parallelised
 
+        opts = {"collapse": collapse, "privatise_arrays": privatise_arrays,
+                "verbose": True, "nowait": asynchronous_parallelism,
+                "enable_reductions": enable_reductions}
+
+        if uniform_intrinsics_only:
+            opts["device_string"] = "nvfortran-uniform"
+
         try:
-            loop_directive_trans.apply(loop)
-            # Only add the region directive if the loop was successfully
-            # parallelised.
-            if region_directive_trans is not None:
-                region_directive_trans.apply(loop.parent.parent)
-        except TransformationError as err:
-            # This loop can not be transformed, proceed to next loop
-            print("Loop not parallelised because:", str(err))
-            continue
+            # First check that the region_directive is feasible for this region
+            if region_directive_trans:
+                # TODO psyclone/#3066 - validate *should* accept a single Node
+                # but currently has a bug and doesn't so we have to make a
+                # list and pass that.
+                region_directive_trans.validate([loop], options=opts)
 
-        if collapse:
-            # Count the number of perfectly nested loops
-            num_nested_loops = 0
-            next_loop = loop
-            while isinstance(next_loop, Loop):
-                num_nested_loops += 1
-                if len(next_loop.loop_body.children) > 1:
-                    break
-                next_loop = next_loop.loop_body.children[0]
+            # If it is, apply the parallelisation directive
+            loop_directive_trans.apply(loop, options=opts)
 
-            if num_nested_loops > 1:
-                loop.parent.parent.collapse = num_nested_loops
+            # And if successful, the region directive on top.
+            if region_directive_trans:
+                region_directive_trans.apply(loop.parent.parent, options=opts)
+        except TransformationError:
+            # This loop cannot be transformed, proceed to next loop.
+            # The parallelisation restrictions will be explained with a comment
+            # associted to the loop in the generated output.
+            continue
+
+    # If we are adding asynchronous parallelism then we now try to minimise
+    # the number of barriers.
+    if asynchronous_parallelism:
+        minsync_trans = OMPMinimiseSyncTrans()
+        minsync_trans.apply(schedule)
 
 
 def valid_kernel(node):
@@ -136,20 +235,23 @@ def valid_kernel(node):
     :rtype: bool
 
     '''
-    excluded_node_types = (CodeBlock, Call)
-    return node.walk(excluded_node_types) == []
+    try:
+        ACCKernelsTrans().validate(node, {"disable_loop_check": True})
+    except TransformationError:
+        return False
+
+    return True
 
 
-def add_kernels(children, default_present=True):
+def add_kernels(children: list[Node], default_present: bool = True):
     '''
     Walks through the PSyIR inserting OpenACC KERNELS directives at as
     high a level as possible.
 
-    :param children: list of sibling Nodes in PSyIR that are candidates for \
+    :param children: list of sibling Nodes in PSyIR that are candidates for
                      inclusion in an ACC KERNELS region.
-    :type children: list of :py:class:`psyclone.psyir.nodes.Node`
-    :param bool default_present: whether or not to supply the \
-                          DEFAULT(PRESENT) clause to ACC KERNELS directives.
+    :param default_present: whether or not to supply the
+        DEFAULT(PRESENT) clause to ACC KERNELS directives.
 
     '''
     if not children:
@@ -168,16 +270,15 @@ def add_kernels(children, default_present=True):
     try_kernels_trans(node_list, default_present)
 
 
-def try_kernels_trans(nodes, default_present):
+def try_kernels_trans(nodes: list[Node], default_present: bool):
     '''
     Attempt to enclose the supplied list of nodes within a kernels
     region. If the transformation fails then the error message is
     reported but execution continues.
 
     :param nodes: list of Nodes to enclose within a Kernels region.
-    :type nodes: list of :py:class:`psyclone.psyir.nodes.Node`
-    :param bool default_present: whether or not to supply the \
-                          DEFAULT(PRESENT) clause to ACC KERNELS directives.
+    :param default_present: whether or not to supply the
+        DEFAULT(PRESENT) clause to ACC KERNELS directives.
 
     '''
     if not nodes:
diff --git a/benchmarks/shallow/SEQ/runme_loop_fuse.py b/benchmarks/shallow/SEQ/runme_loop_fuse.py
deleted file mode 100644
index 0265c266..00000000
--- a/benchmarks/shallow/SEQ/runme_loop_fuse.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from parse import parse,ParseError
-from psyGen import PSyFactory,GenerationError
-#from algGen import Alg
-api="gocean"
-filename="shallow_gocean.f90"
-ast,invokeInfo=parse(filename,api=api,invoke_name="invoke")
-psy=PSyFactory(api).create(invokeInfo)
-print psy.gen
-#alg=Alg(ast,psy)
-
-print psy.invokes.names
-schedule=psy.invokes.get('invoke_0').schedule
-schedule.view()
-
-from psyGen import TransInfo
-t=TransInfo()
-print t.list
-#lf=t.get_trans_name('DoubleLoopFuse')
-lf=t.get_trans_name('LoopFuse')
-
-newschedule,memento=lf.apply(schedule.children[0],schedule.children[1])
-#newschedule,memento=lf.apply(schedule.children[0].children[0].children[0],schedule.children[1].children[0].children[0])
-newschedule.view()
-#psy.invokes.get('invoke_0')._schedule=newschedule
-#print psy.gen
diff --git a/compiler_setup/intel.sh b/compiler_setup/intel.sh
index dd3a70dd..ce334af0 100644
--- a/compiler_setup/intel.sh
+++ b/compiler_setup/intel.sh
@@ -41,10 +41,8 @@ OMPFLAGS="-qopenmp"
 LDFLAGS= 
 #LDFLAGS+= -fast
 
-# The archiver used to generate the API library. We must
-# use Intel's xiar if doing IPO as otherwise the library
-# doesn't contain the necessary symbols.
-AR=xiar
+# The archiver used to generate the API library.
+AR=ar
 ARFLAGS=cru
 
 export F90
diff --git a/compiler_setup/nvidia.sh b/compiler_setup/nvidia.sh
index c9abd3b2..61c4db93 100644
--- a/compiler_setup/nvidia.sh
+++ b/compiler_setup/nvidia.sh
@@ -26,7 +26,7 @@ OMPFLAGS="-mp"
 # Flag to use when compiling with OpenMP GPU offloading support
 OMPTARGETFLAGS="-mp=gpu -gpu=ccnative"
 # Flag to use to specify use of 'managed memory' (unified memory)
-UMEMFLAGS="-gpu=managed"
+UMEMFLAGS="-gpu=mem:managed"
 # Flags to use when compiling with OpenACC support
 ACCFLAGS="-acc=gpu -gpu=ccnative"
 
diff --git a/compiler_setup/nvidia_acc.sh b/compiler_setup/nvidia_acc.sh
index 415e73a8..61c4f289 100644
--- a/compiler_setup/nvidia_acc.sh
+++ b/compiler_setup/nvidia_acc.sh
@@ -11,30 +11,14 @@ CFLAGS="-g"
 F90FLAGS="-O3 -Minfo=all"
 # Debugging options
 #F90FLAGS"+=" -fcheck=all -fbacktrace -ffpe-trap=invalid -g -O0"
-# -Mcuda is for CUDA Fortran
-# nordc - do not link to routines compiled for device (ensure
-# kernel code is in-lined in loops)
-# cc = compute capability
-# Registers are shared by threads in an SMP. The more registers a kernel
-# uses, the fewer threads it can support. This parameter can be tuned and
-# should be a multiple of 8.
-# -Mcuda is required to build CUDA Fortran
-# For Quadro K600
-#F90FLAGS+=" -acc -ta=tesla:cc30,nordc -Mcuda=cc30,nordc"
-# For Tesla K20c
-#F90FLAGS+=" -acc -ta=tesla,cc35,maxregcount:80,nordc -Mcuda=cc35,maxregcount:80,nordc"
-# V100 with managed memory
-F90FLAGS+=" -acc=gpu -gpu=cc70,managed"
+# managed memory
+F90FLAGS+=" -acc=gpu -gpu=mem:managed"
 # Linker flags
-# For Quadro K600
-#LDFLAGS+=" -acc -ta=tesla,cc30 -Mcuda=cc30,nordc"
-# For Tesla K20c
-#LDFLAGS="-acc -ta=nvidia,cc35 -Mcuda=cc35,nordc"
-# V100 with managed memory
-LDFLAGS="-acc=gpu -gpu=cc70,managed"
-# Location of various CUDA maths libraries. libnvToolsExt is required when
+# managed memory
+LDFLAGS="-acc=gpu -gpu=mem:managed"
+# Location of various CUDA maths libraries. nvtx3interop is required when
 # using nvtx for profiling.
-LDFLAGS+=" -Mcuda -L${CUDA_MATH_DIR}/lib64  -lnvToolsExt"
+LDFLAGS+=" -cuda -L${CUDA_MATH_DIR}/lib64 -lnvtx3interop"
 # Flags to use when compiling with OpenMP support
 OMPFLAGS="-mp"
 # Command to use to create archive of object files
diff --git a/compiler_setup/spack_nvidia.sh b/compiler_setup/spack_nvidia.sh
new file mode 100644
index 00000000..a3ac0450
--- /dev/null
+++ b/compiler_setup/spack_nvidia.sh
@@ -0,0 +1,13 @@
+# Build settings for the Nvidia compiler
+# ================================================
+# Fortran compiler
+
+# ==============================
+export F90=$FC
+
+export LDFLAGS="-cuda -L${CUDA_HOME}/lib64 -lnvtx3interop"
+export OMPTARGETFLAGS="-mp=gpu -gpu=ccnative"
+export OMPFLAGS="-mp"
+export UMEMFLAGS="-gpu=mem:managed"
+export ACCFLAGS="-acc=gpu -gpu=ccnative"
+
diff --git a/shared/FortCL b/shared/FortCL
index d516ed01..401148e4 160000
--- a/shared/FortCL
+++ b/shared/FortCL
@@ -1 +1 @@
-Subproject commit d516ed01ea23565bfc4f531a795d2c7a2a57fe50
+Subproject commit 401148e4b6d6efdd4d0157123b118ed07d831446
diff --git a/shared/PSyclone b/shared/PSyclone
index 106543da..63d4c225 160000
--- a/shared/PSyclone
+++ b/shared/PSyclone
@@ -1 +1 @@
-Subproject commit 106543dafe26fe114de192f27311637a85a28a81
+Subproject commit 63d4c22552fb6cd5fafbd4185ef373a1d9e3713c
diff --git a/shared/dl_esm_inf b/shared/dl_esm_inf
index ad209e9d..358402ec 160000
--- a/shared/dl_esm_inf
+++ b/shared/dl_esm_inf
@@ -1 +1 @@
-Subproject commit ad209e9d252995bd83127de4c481232ca14ed655
+Subproject commit 358402ecc4d88e93a62a3ca13dc9d20d2eb27f90
diff --git a/shared/kokkos b/shared/kokkos
index ae5fc649..552f2375 160000
--- a/shared/kokkos
+++ b/shared/kokkos
@@ -1 +1 @@
-Subproject commit ae5fc649ef4b62b48a01123759ed066bff227b43
+Subproject commit 552f2375de06361f8a5662abc0859ae233b5d8f8