diff --git a/examples/nemo/eg7/openmp_cpu_nowait_trans.py b/examples/nemo/eg7/openmp_cpu_nowait_trans.py index 016f56fdd8..aefe2311fb 100755 --- a/examples/nemo/eg7/openmp_cpu_nowait_trans.py +++ b/examples/nemo/eg7/openmp_cpu_nowait_trans.py @@ -42,67 +42,21 @@ OMPLoopTrans, OMPMinimiseSyncTrans, TransformationError, - OMPParallelTrans + MaximalOMPParallelRegionTrans ) from psyclone.psyir.nodes import ( Assignment, Directive, - IfBlock, Loop, - OMPBarrierDirective, - OMPDoDirective, Routine, ) -def add_parallel_region_to_contiguous_directives(schedule): - '''Adds OMPParallelDirective nodes around areas of the schedule with - contiguous OpenMP directives. - - :param schedule: The Schedule to add OpenMPParallelDirectives to. - :type schedule: :py:class:`psyclone.nodes.Schedule` - ''' - par_trans = OMPParallelTrans() - start = -1 - end = -1 - sets = [] - # Loop through the children, if its an OpenMP directive add it - # to the current set - for child in schedule: - if isinstance(child, (OMPDoDirective, OMPBarrierDirective)): - if start < 0: - start = child.position - end = child.position + 1 - else: - # If we have a non OMPDodirective/OMPBarrierDirective then add - # an OMPParallelDirective if needed. - if start >= 0: - sets.append((start, end)) - start = -1 - end = -1 - # Recurse appropriately to sub schedules: - if isinstance(child, Loop): - add_parallel_region_to_contiguous_directives(child.loop_body) - elif isinstance(child, IfBlock): - add_parallel_region_to_contiguous_directives(child.if_body) - if child.else_body: - add_parallel_region_to_contiguous_directives( - child.else_body - ) - # If we get to the end and need to enclose some nodes in a parallel - # directive we do it now - if start >= 0: - sets.append((start, end)) - - for subset in sets[::-1]: - par_trans.apply(schedule[subset[0]:subset[1]]) - - def trans(psyir): ''' Adds OpenMP Loop directives with nowait to Nemo loops over levels. - This is followed by applying OpenMP parallel directives as required, - before removing barriers where possible. - + This is followed by applying OpenMP parallel directives as required + with the OMPMaximalParallelRegionTrans, before removing barriers where + possible. :param psyir: the PSyIR of the provided file. :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` @@ -130,5 +84,5 @@ def trans(psyir): # Apply the largest possible parallel regions and remove any barriers that # can be removed. for routine in psyir.walk(Routine): - add_parallel_region_to_contiguous_directives(routine) + MaximalOMPParallelRegionTrans().apply(routine) minsync_trans.apply(routine) diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index a7c703c66b..1a1008a1ca 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -40,14 +40,14 @@ from psyclone.domain.common.transformations import KernelModuleInlineTrans from psyclone.psyir.nodes import ( - Assignment, Loop, Directive, Node, Reference, CodeBlock, Call, Return, - IfBlock, Routine, Schedule, IntrinsicCall, StructureReference) + Assignment, Loop, Directive, Node, Reference, CodeBlock, Call, + Routine, Schedule, IntrinsicCall, StructureReference) from psyclone.psyir.symbols import DataSymbol from psyclone.psyir.transformations import ( ArrayAssignment2LoopsTrans, HoistLoopBoundExprTrans, HoistLocalArraysTrans, HoistTrans, InlineTrans, Maxval2LoopTrans, ProfileTrans, OMPMinimiseSyncTrans, Reference2ArrayRangeTrans, - ScalarisationTrans, IncreaseRankLoopArraysTrans) + ScalarisationTrans, IncreaseRankLoopArraysTrans, MaximalRegionTrans) from psyclone.transformations import TransformationError # USE statements to chase to gather additional symbol information. @@ -474,6 +474,11 @@ def add_profiling(children: Union[List[Node], Schedule]): attempt to add profiling regions. ''' + class MaximalProfilingTrans(MaximalRegionTrans): + '''Applies Profiling to the largest possible region.''' + _allowed_nodes = [Assignment, Call, CodeBlock] + _transformation = ProfileTrans + if children and isinstance(children, Schedule): # If we are given a Schedule, we look at its children. children = children.children @@ -487,55 +492,4 @@ def add_profiling(children: Union[List[Node], Schedule]): if parent_routine and parent_routine.return_symbol: return - node_list = [] - for child in children[:]: - # Do we want this node to be included in a profiling region? - if child.walk((Directive, Return)): - # It contains a directive or return statement so we put what we - # have so far inside a profiling region. - add_profile_region(node_list) - # A node that is not included in a profiling region marks the - # end of the current candidate region so reset the list. - node_list = [] - # Now we go down a level and try again without attempting to put - # profiling below directives or within Assignments - if isinstance(child, IfBlock): - add_profiling(child.if_body) - add_profiling(child.else_body) - elif not isinstance(child, (Assignment, Directive)): - add_profiling(child.children) - else: - # We can add this node to our list for the current region - node_list.append(child) - add_profile_region(node_list) - - -def add_profile_region(nodes): - ''' - Attempt to put the supplied list of nodes within a profiling region. - - :param nodes: list of sibling PSyIR nodes to enclose. - :type nodes: list of :py:class:`psyclone.psyir.nodes.Node` - - ''' - if nodes: - # Check whether we should be adding profiling inside this routine - routine_name = nodes[0].ancestor(Routine).name.lower() - if any(ignore in routine_name for ignore in PROFILING_IGNORE): - return - if len(nodes) == 1: - if isinstance(nodes[0], CodeBlock) and \ - len(nodes[0].get_ast_nodes) == 1: - # Don't create profiling regions for CodeBlocks consisting - # of a single statement - return - if isinstance(nodes[0], IfBlock) and \ - "was_single_stmt" in nodes[0].annotations and \ - isinstance(nodes[0].if_body[0], CodeBlock): - # We also don't put single statements consisting of - # 'IF(condition) CALL blah()' inside profiling regions - return - try: - ProfileTrans().apply(nodes) - except TransformationError: - pass + MaximalProfilingTrans.apply(children) diff --git a/src/psyclone/psyir/transformations/__init__.py b/src/psyclone/psyir/transformations/__init__.py index 9f186ee31e..a9f81d7961 100644 --- a/src/psyclone/psyir/transformations/__init__.py +++ b/src/psyclone/psyir/transformations/__init__.py @@ -94,6 +94,8 @@ from psyclone.psyir.transformations.loop_trans import LoopTrans from psyclone.psyir.transformations.value_range_check_trans import ( ValueRangeCheckTrans) +from psyclone.psyir.transformations.maximal_region_trans import ( + MaximalRegionTrans) from psyclone.psyir.transformations.omp_critical_trans import ( OMPCriticalTrans) from psyclone.psyir.transformations.omp_loop_trans import OMPLoopTrans @@ -126,6 +128,9 @@ OMPTaskloopTrans from psyclone.psyir.transformations.omp_declare_target_trans import \ OMPDeclareTargetTrans +from psyclone.psyir.transformations.maximal_omp_parallel_region_trans import ( + MaximalOMPParallelRegionTrans +) from psyclone.psyir.transformations.omp_parallel_trans import ( OMPParallelTrans, ) @@ -179,6 +184,8 @@ "ParallelRegionTrans", "OMPTaskloopTrans", "OMPDeclareTargetTrans", + "MaximalRegionTrans", "OMPCriticalTrans", + "MaximalOMPParallelRegionTrans", "OMPParallelTrans", ] diff --git a/src/psyclone/psyir/transformations/maximal_omp_parallel_region_trans.py b/src/psyclone/psyir/transformations/maximal_omp_parallel_region_trans.py new file mode 100644 index 0000000000..d16f2b8ed6 --- /dev/null +++ b/src/psyclone/psyir/transformations/maximal_omp_parallel_region_trans.py @@ -0,0 +1,93 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2017-2025, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the MaximalOMPParallelRegionTrans.''' + +from typing import Union + +from psyclone.psyir.nodes import ( + OMPTaskwaitDirective, + OMPBarrierDirective, + OMPSerialDirective, + OMPTaskloopDirective, + OMPDoDirective, + OMPLoopDirective, + OMPTaskDirective, + DynamicOMPTaskDirective, + Node, + Schedule +) +from psyclone.psyir.transformations.maximal_region_trans import ( + MaximalRegionTrans) +from psyclone.psyir.transformations.omp_parallel_trans import OMPParallelTrans +from psyclone.utils import transformation_documentation_wrapper + + +@transformation_documentation_wrapper +class MaximalOMPParallelRegionTrans(MaximalRegionTrans): + '''Applies OpenMP Parallel directives around the largest possible sections + of the input. + + At current, this will never place OpenMP parallel sections around + Assignments that are outside of another OpenMP directive. See #3157 and + the discussion on #3205 for more detail.''' + # The type of parallel transformation to be applied to the input region. + _transformation = OMPParallelTrans + # Tuple of statement nodes allowed inside the _transformation + _allowed_nodes = ( + OMPTaskwaitDirective, + OMPBarrierDirective, + OMPSerialDirective, + OMPTaskloopDirective, + OMPDoDirective, + OMPLoopDirective, + OMPTaskDirective, + DynamicOMPTaskDirective, + ) + _required_nodes = ( + OMPSerialDirective, + OMPTaskloopDirective, + OMPDoDirective, + OMPLoopDirective, + OMPTaskDirective, + DynamicOMPTaskDirective, + ) + + def apply(self, nodes: Union[Node, Schedule, list[Node]], **kwargs): + '''Applies the transformation to the nodes provided. + + :param nodes: can be a single node, a schedule or a list of nodes. + ''' + super().apply(nodes, **kwargs) diff --git a/src/psyclone/psyir/transformations/maximal_region_trans.py b/src/psyclone/psyir/transformations/maximal_region_trans.py new file mode 100644 index 0000000000..04363c13a8 --- /dev/null +++ b/src/psyclone/psyir/transformations/maximal_region_trans.py @@ -0,0 +1,245 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2025-2026, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the MaximalRegionTrans.''' + +import abc +from typing import Union + +from psyclone.psyir.nodes import ( + Node, + Schedule, + Loop, + IfBlock, + WhileLoop, +) +from psyclone.psyir.transformations.region_trans import RegionTrans +from psyclone.psyir.transformations.transformation_error import \ + TransformationError +from psyclone.utils import transformation_documentation_wrapper + + +@transformation_documentation_wrapper +class MaximalRegionTrans(RegionTrans, metaclass=abc.ABCMeta): + '''Abstract transformation containing the functionality to add + the largest allowed transformation to the provided code segment. + + Subclasses should override the _transformation and _allowed_nodes + members to control the functionality. + + The _transformation should be a transformation class to apply to + the computed set of regions. + + The _allowed_nodes is a tuple of Node classes that are allowed as + statements in the transformed region. Note that upon finding a Loop or + IfBlock, the node's children will be checked to determine whether its safe + to contain the Loop or IfBlock in the transformed section.''' + + # The type of transformation to be applied to the input region. + _transformation = None + # Tuple of statement nodes allowed inside the _transformation + _allowed_nodes = () + # Tuple of nodes that there must be at least one of inside the block + # to be transformed, else the block can be ignored (e.g. a block of + # only barriers doesn't need to be transformed). Defaults to any Node. + _required_nodes = (Node) + + def _can_be_in_region(self, node: Node) -> bool: + '''Returns whether the provided node can be included in a + region. Loops and if statements are recursed into to check if their + children can be. + + :param node: the candidate Node to be placed into a transformed + region. + + :returns: whether it is safe to add the node to a transformed region. + ''' + + if isinstance(node, self._allowed_nodes): + return True + + if isinstance(node, (Loop, WhileLoop)): + # Recurse through the loop body. + for child in node.loop_body: + if not self._can_be_in_region(child): + break + else: + return True + return False + + if isinstance(node, IfBlock): + # Recurse through the if_body and else_body + allowed = True + for child in node.if_body: + allowed = (allowed and self._can_be_in_region(child)) + if node.else_body and allowed: + for child in node.else_body: + allowed = (allowed and + self._can_be_in_region(child)) + return allowed + + # All other node types we default to False. + return False + + def _compute_transformable_sections( + self, node_list: list[Node] + ) -> list[list[Node]]: + ''' + Computes the sections of the input node_list to apply the + transformation to. + + :param node_list: The node_list passed into this Transformation. + :returns: The list of node_lists to apply this class' + _transformation class to. + ''' + # Find the largest sections we can surround with the transformation. + all_blocks = [] + current_block = [] + for child in node_list: + # If the child can be added to a transformed region then add it + # to the current block of nodes. + if self._can_be_in_region(child): + current_block.append(child) + else: + # Otherwise, if the current_block contains any children, + # add them to the list of regions to be transformed and reset + # the current_block. + if current_block: + for node in current_block: + if node.walk(self._required_nodes, + stop_type=self._required_nodes): + all_blocks.append(current_block) + break + current_block = [] + # Need to recurse on some node types + if isinstance(child, IfBlock): + if_blocks = self._compute_transformable_sections( + child.if_body + ) + all_blocks.extend(if_blocks) + if child.else_body: + else_blocks = self._compute_transformable_sections( + child.else_body + ) + all_blocks.extend(else_blocks) + if isinstance(child, (Loop, WhileLoop)): + loop_blocks = self._compute_transformable_sections( + child.loop_body + ) + all_blocks.extend(loop_blocks) + # If any nodes are left in the current block at the end of the + # node_list, then add them to a transformed region + if current_block: + for node in current_block: + if node.walk(self._required_nodes, + stop_type=self._required_nodes): + all_blocks.append(current_block) + break + + return all_blocks + + def _handle_invalid_block(self, err: TransformationError, + block: list[Node], + all_blocks: list[list[Node]]): + ''' + Function to handle what happens when a discovered block fails + validation for the relevant transformation. Children classes + are free to implement their own version of this routine. The + default implementation removes the block from the list of blocks + and continues. + + :param err: The TransformationError raised by the transformation's + validate function. + :param block: The block that failed validation. + :param all_blocks: The list of all of the blocks found during + application of this transformation. + ''' + all_blocks.remove(block) + + def validate(self, nodes: Union[Node, Schedule, list[Node]], **kwargs): + '''Validates whether this transformation can be applied to the + nodes provided. + + :param nodes: can be a single node, a schedule or a list of nodes. + + :raises TransformationError: if the nodes provided don't all have the + same parent and aren't consecutive. + ''' + + self.validate_options(**kwargs) + node_list = self.get_node_list(nodes) + + node_parent = node_list[0].parent + prev_position = node_list[0].position + for child in node_list[1:]: + if child.parent is not node_parent: + raise TransformationError( + f"Error in {self.name} transformation: supplied nodes " + f"are not children of the same parent.") + if prev_position+1 != child.position: + raise TransformationError( + f"Children are not consecutive children of one parent: " + f"child '{child.debug_string().rstrip()}' has position " + f"{child.position}, but previous child had position " + f"{prev_position}.") + prev_position = child.position + + def apply(self, nodes: Union[Node, Schedule, list[Node]], **kwargs): + '''Applies the transformation to the nodes provided. + + :param nodes: can be a single node, a schedule or a list of nodes. + ''' + node_list = self.get_node_list(nodes) + + # Call validate. + self.validate(nodes, **kwargs) + + par_trans = self._transformation() + + all_blocks = self._compute_transformable_sections(node_list) + + # Check that the transformation can be applied to all of the found + # blocks. + for block in all_blocks[:]: + try: + par_trans.validate(block) + except TransformationError as err: + # Perform class specific behaviour for a block that fails + # transformation validation. + self._handle_invalid_block(err, block, all_blocks) + + # Apply the transformation to all of the blocks found. + for block in all_blocks: + par_trans.apply(block) diff --git a/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py b/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py index 0b4cb08a0a..5ce267e542 100644 --- a/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py +++ b/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py @@ -172,6 +172,18 @@ def validate(self, node: Routine, **kwargs) -> None: raise TypeError(f"OMPMinimiseSyncTrans expects a Routine input " f"but found '{type(node).__name__}'.") + def _eliminate_uncontained_barriers(self, routine: Routine) -> None: + ''' + Removes any OMPBarrierDirectives that are not inside an + OMPParallelRegion. + + :param routine: the routine to remove uncontainined barriers from. + ''' + barriers = routine.walk(OMPBarrierDirective) + for bar in barriers: + if bar.ancestor(OMPParallelDirective) is None: + bar.detach() + def _eliminate_adjacent_barriers(self, routine: Routine, bar_type: type) -> None: ''' @@ -537,6 +549,9 @@ def apply(self, node: Routine, **kwargs) -> None: # if its a OMPBarrierDirective as they are unnecessary. for parallel in node.walk(OMPParallelDirective): _eliminate_final_parallel_barrier(parallel) + # Finally eliminate any barriers leftover outside of parallel + # regions, as these are now superfluous + self._eliminate_uncontained_barriers(node) # Eliminate OMPTaskwaitDirectives for the gpu_directives if len(gpu_directives) > 0: self._eliminate_adjacent_barriers(node, OMPTaskwaitDirective) diff --git a/src/psyclone/tests/psyir/transformations/maximal_ompparallel_region_trans_test.py b/src/psyclone/tests/psyir/transformations/maximal_ompparallel_region_trans_test.py new file mode 100644 index 0000000000..a8ad9d88c8 --- /dev/null +++ b/src/psyclone/tests/psyir/transformations/maximal_ompparallel_region_trans_test.py @@ -0,0 +1,54 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2026, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the tests for the MaximalOMPParallelRegionTrans.''' + +from psyclone.psyir.nodes import ( + OMPParallelDirective, +) +from psyclone.psyir.transformations import ( + MaximalOMPParallelRegionTrans, +) + + +def test_maximal_ompparallel_region_trans_apply(fortran_reader): + ''' Test the apply method of the ompparallel region transformation.''' + code = """subroutine x + integer :: i + i = 1 + end subroutine x""" + psyir = fortran_reader.psyir_from_source(code) + MaximalOMPParallelRegionTrans().apply(psyir.children[0].children[:]) + assert len(psyir.walk(OMPParallelDirective)) == 0 diff --git a/src/psyclone/tests/psyir/transformations/maximal_region_trans_test.py b/src/psyclone/tests/psyir/transformations/maximal_region_trans_test.py new file mode 100644 index 0000000000..dbe18f691c --- /dev/null +++ b/src/psyclone/tests/psyir/transformations/maximal_region_trans_test.py @@ -0,0 +1,300 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2026, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the tests for the MaximalRegionTrans.''' + +import pytest + +from psyclone.psyGen import Transformation +from psyclone.psyir.nodes import ( + Assignment, + IfBlock, + Routine, + OMPParallelDirective, +) +from psyclone.psyir.transformations import ( + MaximalRegionTrans, + TransformationError, + OMPParallelTrans +) + + +# Dummy class to test MaxParallelRegionTrans' functionality. +class MaxParTrans(MaximalRegionTrans): + # The apply function will do OMPParallelTrans around allowed regions. + _transformation = OMPParallelTrans + # We're only allowing assignment because its straightforward to test with. + _allowed_nodes = (Assignment, ) + # Should parallelise any found region that contains an assignment. + _required_nodes = (Assignment, ) + + +@pytest.mark.parametrize( + "statement,expected", + [ + ("i = 1", True), + ("call a_function()", False), + ("do i = 1, 100\nj = j + 1\nend do", True), + ("do i = 1, 100\ncall a_function()\nend do", False), + ("if (.true.) then\nj=3\nend if", True), + ("if(.true.) then\nj=3\nelse\nj=3\nend if", True), + ("if(.true.) then\ncall a_function()\nelse\nj=3\nendif", False), + ("if(.true.) then\nj=3\nelse\ncall a_function()\nendif", False), + ] +) +def test_can_be_in_region(fortran_reader, statement, expected): + '''Test the _can_be_in_region function of + MaxParallelRegionTrans.''' + code = f""" + subroutine test + use some_module + integer :: i, j + {statement} + end subroutine test + """ + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + trans = MaxParTrans() + assert trans._can_be_in_region(routine.children[0]) == expected + + +def test_validate(fortran_reader): + '''Test the validate function of MaxParallelRegionTrans.''' + code = """ + subroutine test + integer :: i, j + i = 1 + j = 1 + k = i + 1 + if(.true.) then + k = i + j + end if + end subroutine test""" + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + trans = MaxParTrans() + # Validate should allow us to give the full children + trans.validate(routine.children) + + # Validate should not allow non consecutive children + with pytest.raises(TransformationError) as err: + trans.validate([routine.children[0], routine.children[2]]) + assert ("Children are not consecutive children of one parent: child " + "'k = i + 1' has position 2, but previous child had position 0." + in str(err.value)) + + # Validate should not allow children of different parents. + with pytest.raises(TransformationError) as err: + trans.validate([routine.children[0], + routine.children[3].if_body.children[0]]) + assert ("Error in MaxParTrans transformation: supplied nodes are not " + "children of the same parent" in str(err.value)) + + +def test_apply(fortran_reader): + '''Test the apply function of MaxParallelRegionTrans.''' + code = """ + subroutine test + use some_module + integer :: i, j + i = 1 + j = 1 + call a_function() + if(.true.) then + i = 1 + end if + j = 1 + end subroutine test + """ + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + mtrans = MaxParTrans() + mtrans.apply(routine) + # The result should be two OMPParallelDirectives, one containing + # i = 1 and j = 1, and another containing the IFBlock and the second j = 1 + dirs = routine.walk(OMPParallelDirective) + assert len(dirs) == 2 + + assert len(dirs[0].dir_body.children) == 2 + assert dirs[0].dir_body.children[0].debug_string() == "i = 1\n" + assert dirs[0].dir_body.children[1].debug_string() == "j = 1\n" + + assert isinstance(dirs[1].dir_body.children[0], IfBlock) + assert dirs[1].dir_body.children[1].debug_string() == "j = 1\n" + + code = """subroutine x + integer :: i, j, k, l + + i = 1 + j = 2 + k = 3 + l = 4 + end subroutine x""" + psyir = fortran_reader.psyir_from_source(code) + assigns = psyir.walk(Assignment) + mtrans.apply(assigns) + assert len(psyir.walk(OMPParallelDirective)) == 1 + pdir = psyir.walk(OMPParallelDirective)[0] + # All the assignments should be in the parallel directive. + for assign in assigns: + assert assign.parent.parent is pdir + + code = """subroutine x + integer :: i, j, k, l + + i = 1 + do j = 2, 3 + k = 1 + end do + if (j == 2) then + k = 4 + end if + do while(j < 3) + j = j + 1 + end do + i = 4 + end subroutine x + """ + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans.apply(nodes) + assert len(psyir.walk(OMPParallelDirective)) == 1 + pdir = psyir.walk(OMPParallelDirective)[0] + # All of the blocks here should be in the same ParallelDirective + for node in nodes: + assert node.parent.parent is pdir + + code = """subroutine x + use some_mod + integer :: i + + i = 1 + call something() + i = 2 + end subroutine x""" + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans.apply(nodes) + pdirs = psyir.walk(OMPParallelDirective) + assert len(pdirs) == 2 + # All of the blocks here should be in the same ParallelDirective + assert nodes[0].parent.parent is pdirs[0] + assert not nodes[1].ancestor(OMPParallelDirective) + assert nodes[2].parent.parent is pdirs[1] + + code = """subroutine x + use some_mod + integer :: i, j + + if(i == 1) then + call something() + i = 2 + else + i = 3 + end if + + do i = 1,5 + call something() + j = 2 + end do + + do while(j == 3) + call something() + j = j + 2 + end do + end subroutine x""" + # Each of the nodes should contain OMPParallels inside them and there + # should be no top level OMPParallelDirective + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans.apply(nodes) + assert len(psyir.walk(OMPParallelDirective)) == 4 + assert len(nodes[0].walk(OMPParallelDirective)) == 2 + assert len(nodes[0].if_body.children) == 2 + assert isinstance(nodes[0].if_body.children[1], OMPParallelDirective) + assert isinstance(nodes[0].else_body.children[0], OMPParallelDirective) + + # Dummy class to test failing validation. + class Faketrans(Transformation): + '''Dummy transformation to test failing validation.''' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._validate_count = 0 + + def validate(self, node, **kwargs): + if self._validate_count < 1: + self._validate_count = self._validate_count + 1 + return + raise TransformationError("") + + def apply(self, node, **kwargs): + OMPParallelTrans().apply(node, **kwargs) + + class OneParTrans(MaximalRegionTrans): + '''Dummy MaximalRegionTrans that uses our FakeTrans''' + _transformation = Faketrans + _allowed_nodes = (Assignment, ) + _required_nodes = (Assignment, ) + + code = """subroutine x + use some_mod + integer :: i, j + + if(i == 1) then + call something() + i = 2 + else + i = 3 + end if + + do i = 1,5 + call something() + j = 2 + end do + + do while(j == 3) + call something() + j = j + 2 + end do + end subroutine x""" + # Each of the nodes should contain OMPParallels inside them and there + # should be no top level OMPParallelDirective + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans = OneParTrans() + mtrans.apply(nodes) + # Validate fails on all but the first try so we only get one resulting + # OMPParallelDirective + assert len(psyir.walk(OMPParallelDirective)) == 1 diff --git a/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py b/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py index e0c68adf01..0928207100 100644 --- a/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py @@ -67,6 +67,25 @@ def test_omp_remove_barrier_validate(): in str(excinfo.value)) +def test_omp_eliminate_uncontained_barriers(fortran_reader): + ''' + Test the _eliminite_uncontained_barriers routine of the + OMPMinimiseSyncTrans.''' + code = """subroutine test + + end subroutine + """ + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + routine.addchild(OMPBarrierDirective()) + routine.addchild(OMPBarrierDirective()) + partrans = OMPParallelTrans() + partrans.apply(routine.children[1]) + assert len(routine.walk(OMPBarrierDirective)) == 2 + OMPMinimiseSyncTrans()._eliminate_uncontained_barriers(routine) + assert len(routine.walk(OMPBarrierDirective)) == 1 + + def test_omp_eliminate_adjacent_barriers(fortran_reader): '''Test the _eliminate_adjacent_barriers routine of the OMPMinimiseSyncTrans.'''