diff --git a/psytran/__init__.py b/psytran/__init__.py index 1baf510..3466583 100644 --- a/psytran/__init__.py +++ b/psytran/__init__.py @@ -12,3 +12,5 @@ from psytran.directives import * # noqa from psytran.family import * # noqa from psytran.loop import * # noqa +from psytran.transmute_cpu import * # noqa +from psytran.transmute_rules import * # noqa diff --git a/psytran/family.py b/psytran/family.py index 1f4db34..251c4c4 100644 --- a/psytran/family.py +++ b/psytran/family.py @@ -2,13 +2,22 @@ # # This file is part of PSyTran and is released under the BSD 3-Clause license. # See LICENSE in the root of the repository for full licensing details. - r""" This module provides functions for determining the ancestors and descendents of :py:class:`Node`\s, as well as for querying their existence and nature. """ -from psyclone.psyir.nodes import Loop, Node +from psyclone.psyir.nodes import ( + Loop, Node, Assignment, Schedule, + ArrayOfStructuresReference, + IfBlock, + OMPDoDirective, + OMPParallelDirective, + OMPParallelDoDirective) +from psyclone.psyGen import Transformation +from psyclone.transformations import TransformationError +from psytran.prop_trans import PropTrans + __all__ = [ "get_descendents", @@ -16,6 +25,18 @@ "get_children", "has_descendent", "has_ancestor", + "child_valid_trans", + "check_omp_ancestry", + "get_last_child_shed", + "span_check_loop", + "span_parallel", + "string_match_ref_var", + "strip_index", + "get_reference_tags", + "try_transformation", + "try_validation", + "update_ignore_list", + "work_out_collapse_depth" ] @@ -161,3 +182,533 @@ def has_ancestor(node, node_type=Loop, inclusive=False, name=None): if name: return any(ancestor.variable.name == name for ancestor in ancestors) return bool(ancestors) + + +def child_valid_trans(check_current_node): + ''' + We want to see if the loop could be parallelised with a + pardo transformation. Given we are going all the way down + the tree in the wider loop, we have to do this manually here. + We check the current loop to see if it can be pardo. + Checks are standard checks (ignoring OMP ancestry + as this is done at the top level). + + ''' + valid_loop = False + + # Setup + options = {} + trans = PropTrans() + + loop_child_list = get_descendents(check_current_node, Loop) + + # If there is a list of loops + if loop_child_list: + # If the current node is a loop, we want to check it to, first + if isinstance(check_current_node, Loop): + loop_child_list.insert(0, check_current_node) + + for loop in loop_child_list: + # If additional rules grow for checking what is a valid loop + # they may need to go here too on a case by case basis. + # Checking for the array of structs it not needed, as we can still + # parallelise something there. + options = work_out_collapse_depth(loop, options) + + # Call the validation instead of try_transformation + error = try_validation(loop, trans.omp_transform_par_do(), options) + + # IF there are no errors, continue, otherwise exit + if len(error) == 0 or error == "": + valid_loop = True + else: + valid_loop = False + break + + # Return number of children spanning over also + # Used in in rule for no of loops spanning over + if valid_loop: + ret_child_loop_qty = len(loop_child_list) + else: + ret_child_loop_qty = 0 + + return valid_loop, ret_child_loop_qty + + +def check_omp_ancestry( + node_target: Node, + transformation: Transformation, +): + ''' + Check the OMP ancestry, both for spanning parallel or + parallel do. Returns True, which will stop future transformations + from occurring. + If the transformation to apply is a simple OMP do then we instead + expect the presence of a parallel region, and therefore will + return false. + Due to psyir tree returning sibling loop ancestors during psyclone, + also check the path to each OMP parallel. + If there is a path to both, that is incorrect. + See the list of exceptions below. + ''' + + # Store whether there is a current Loop and a + # OMP Parallel, Parallel Do or Do ancestor + omp_ancestor_par = node_target.ancestor(OMPParallelDirective) + omp_ancestor_par_do = node_target.ancestor(OMPParallelDoDirective) + omp_ancestor_do = node_target.ancestor(OMPDoDirective) + + # There seems to be an issue in PSyclone where the presence of an OMP + # parallel section. + # Under the same parent node is causing a little confusion. + # The paths allow us to check index references for closeness + # Given a certain circumstance we can iron out the confusion + # Needs to be wrapped in a try else it fails + try: + path_to_omp_par = node_target.path_from(omp_ancestor_par) + except ValueError: + path_to_omp_par = False + try: + path_to_omp_par_do = node_target.path_from(omp_ancestor_par_do) + except ValueError: + path_to_omp_par_do = False + + # DEFAULT result + # The default presence of OMP given the below ancestry + # True : There is an OpenMP Parallel section above the node, + # This can be either a Parallel or Parallel Do section above + # The default intended use is if this is True, do not do an + # OpenMP transformation. + # False : There is no OpenMP Parallel sections detected above. + if ((omp_ancestor_par and path_to_omp_par) + or (omp_ancestor_par_do and path_to_omp_par_do)): + print("Parallel OMP region ancestor found") + omp_ancestry_presence = True + else: + omp_ancestry_presence = False + + # When adding a OpenMP DO transformation, it's a little more nuanced. + # We will in principle reverse the result. + # True : There is an OpenMP Parallel Do section above the node. + # False : There is a Parallel section above the Node, and it is safe. + if transformation.omp_directive == "do": + # A OpenMP do transformation needs a parallel section above, + # it checks and corrects. + # An occurrence where adjacent parallel do regions are being + # picked up and the parallel ancestor is misreporting. Therefore, + # if there a parallel ancestor and path, but there is not a path to a + # parallel do reference node. Checking the paths mitigates this + # occurrence. To be reported to STFC as a bug in psyclone. + if omp_ancestor_par and path_to_omp_par and not path_to_omp_par_do: + print("Adjacent node detected in Ancestry") + print("Parallel OMP region ignored as transformation is OMP do") + omp_ancestry_presence = False + + # If there is no path to both an parallel or parallel do Node, + # but it has reported an parallel ancestor + if not omp_ancestor_par and not path_to_omp_par: + print("No Parallel region present, cannot try do") + omp_ancestry_presence = True + + # This stops the nesting of OMP do under parallel sections + # If there is one already present, it will effectively understand the + # above a parallel and a do, which we don't want to try parallelism in + # this occurrence. + if omp_ancestor_do: + omp_ancestry_presence = True + + return omp_ancestry_presence + + +def get_last_child_shed(loop_node): + ''' + Get the last child loop schedule of the provided node. + Then we can do some checks on it + ''' + + loop_list = [] + + # Set to false if a Schedule cannot be found + ret_shed = None + + # Work through the schedule of the node + # The first one will be the schedule of this loop + loop_shed_list = loop_node.walk(Schedule) + loop_list = loop_shed_list[0].walk(Loop) + + # If there is a list of loops for this schedule + if loop_list: + # Get the last element and walk it's schedule + last_element = len(loop_list)-1 + shed_list = loop_list[last_element].walk(Schedule) + if shed_list: + # Return the first schedule for this loop. + ret_shed = shed_list[0] + + return ret_shed + + +def span_check_loop(child_list, start_index_loop, loop_max_qty): + ''' + Run through child_list from the provided start index. + Check each node. + If it is an If block or a Loop node, check loops, self + and children. These loops are checked against their own set + of rules and a provided limit from the override. + ''' + + trans = PropTrans() + + # Setup for loop + last_good_index = 0 + loop_child_qty = 0 + + # Assume we are unable to span a region if nothing is found + parallel_possible = False + + # Work through the child list of nodes of the given ancestor + # Starting at the current node, ending at the last child node + # The goal of the loop is to find the most appropriate end node for + # a parallel region. It will check all of the loops present in the + # current proposed parallel region can be transformed. + # As soon as one cannot be, it ends the loop, leaving the end index + # as the previous step through the loop. + for index in range(start_index_loop, len(child_list)-1): + check_current_node = child_list[index] + + # reset each loop + try_parallel = False + + assignment_cont = False + + # If its an assignment we want to continue to the next index to check + # but not try a parallel region. This way they can be included in the + # spanning region, but will not be the start or end nodes. + # May need to be adjusted in the future + if isinstance(check_current_node, Assignment): + assignment_cont = True + + # If the node is an if block or loop, + # check all of the loops could be OpenMP + # parallel do normally, as a safety check for the region as a whole. + if isinstance(check_current_node, (IfBlock, Loop)): + ret_child_qty = 0 + try_parallel, ret_child_qty = child_valid_trans(check_current_node) + # Add more checks here and keep setting try_parallel or similar + if try_parallel: + loop_child_qty = loop_child_qty + ret_child_qty + + if loop_child_qty > loop_max_qty: + break + + # if the node is a loop, if or assignment, we want to continue + # we've checked as to whether to loop children are good to parallelise + # if it's an assignment node, we want to continue the loop, + # but go no further with the checks. + if try_parallel or assignment_cont: + # Leave these if checks in place, try_parallel and else, + # index > start_index_loop, they work well + # If the node loop children are good, and it's not the first node + if try_parallel: + if index > start_index_loop: + check_span_nodes = [] + # Surely we should be checking the current index? + for index_inner in range(start_index_loop, index+1): + check_span_nodes.append(child_list[index_inner]) + # Try the transformation + error = try_validation( + check_span_nodes, + trans.omp_parallel(), {}) + # If there is an error, we cannot do this one and + # should break + if len(error) == 0 or error == "": + parallel_possible = True + last_good_index = index + else: + print(error) + break + else: + break + + return parallel_possible, last_good_index + + +def span_parallel(loop_node, loop_max_qty): + ''' + Transformation used is omp_parallel. Span a parallel section. + Get the ancestor node of the provided node, then grab it's children. + This is a list of all nodes, including the provided node. + This provided node will be the first node checked. + ''' + + trans = PropTrans() + + # Find the ancestor schedule. + # Given all of this stems from the first loop + # There is an occurrence where this needs to stem + # from the first if above a loop + # so far no adverse effects of doing so + if_loop_ancestor = loop_node.ancestor(IfBlock) + if if_loop_ancestor: + shared_ancestor = if_loop_ancestor.ancestor(Schedule) + check_node = if_loop_ancestor + # Otherwise get an reference to the ancestor schedule + # Even the top loop, who's ancestor is a subroutine + # Or similar, will + else: + shared_ancestor = loop_node.ancestor(Schedule) + check_node = loop_node + + # Get the child list of the ancestors schedule + # This will be used to have a list of potential nodes + # to span over ready. + child_list = shared_ancestor.children + + # Work through the list, until we meet the node which is + # the current origin in the schedule of the ancestor. + # This will be the first index node which a potential + # parallel region is spanned from. + start_index_loop = 0 + for index, node in enumerate(child_list): + if check_node == node: + start_index_loop = index + # We only want the start, we can exit the loop + break + + # Check each node in the child list + # Check all loop nodes under each node (including node if is a loop) + parallel_possible, last_good_index = span_check_loop(child_list, + start_index_loop, + loop_max_qty) + + # The final attempt to see if parallel region is possible + # Given these indexes have been validated, it should be + if parallel_possible: + span_nodes = [] + for index_inner in range(start_index_loop, last_good_index+1): + span_nodes.append(child_list[index_inner]) + if len(span_nodes) > 1: + # Try the transformation + error = try_transformation(span_nodes, trans.omp_parallel(), {}) + # Confirm success and advise which nodes + if len(error) == 0 or error == "": + print("Spanning Parallel over:") + print(span_nodes) + + +def string_match_ref_var(loop_node): + ''' + We need to check the metadata of whether references of a + loop node match to certain properties. + Return True, if a Array of types (or ArrayOfStructures) + is found. + ArrayOfStructures is a good reference to find, it notes + that there is an array of objects in the children that is accessed + in the loop body, likely better being parallelised differently. + Therefore if we find this match, we want to skip it. + ''' + + # get the last child schedule, only do work if shed exists + last_child_shed = get_last_child_shed(loop_node) + + struct_ref_exists = False + + if last_child_shed: + reference_tags = [] + for struct_ref in last_child_shed.walk(ArrayOfStructuresReference): + struct_ref_exists = True + reference_tags.extend(get_reference_tags(struct_ref)) + + # Only do the work if ArrayOfStructuresReference exists + if struct_ref_exists: + variable_tags = get_reference_tags(loop_node) + # The first is the current indexer of the loop + # If the loop index is in the indexes found related to the + # ArrayOfStructures reference, then we've found a match + if variable_tags[0] in reference_tags: + return True + + return False + + +def strip_index(line, str_tag): + ''' + Run through the provided string line from a schedule, and + strip the line at the given tag down to just an index. + There does not seem to be an alterative method in PSYIR + to return just this property. In order to check a + struct (type) that is accessed by a list, this is method + required. + ''' + + breakout_string = line.partition(str_tag) + # We know it's the start of the array, given the str_tag + indexer_list = breakout_string[2].split(",") + # formatting + indexer_ref = indexer_list[0] + # A list of characters which will be removed from strings. + for char in ["[", "]", "'"]: + indexer_ref = indexer_ref.replace(char, "") + + # return the index without any clutter + return indexer_ref + + +def get_reference_tags(node): + ''' + Extracts tags found in schedules for different node types + ''' + # Create our empty list to store reference tags + reference_tags = [] + + # get the reference the hard way as we cannot seem to + # access it directly + information = str(node) + array_info = information.splitlines() + + if isinstance(node, ArrayOfStructuresReference): + # Work through each line of the struct_ref turned into a str + for line in array_info: + # If there is a Reference, this is what we are going to + # manipulate to gather out the index reference + if ("Reference[name:" in line and + "ArrayOfStructures" not in line): + reference_tags.append(strip_index( + line, "Reference[name:")) + + elif isinstance(node, Loop): + for line in array_info: + if "Loop[variable:" in line: + reference_tags.append(strip_index( + line, "Loop[variable:")) + + else: + raise TypeError(f"Node of type {type(node)} not currently supported " + f"for extracting tags") + + return reference_tags + + +def try_transformation( + node_target: Node, + transformation: Transformation, + options: dict = None +): + ''' + Try the provided transformation provided. + Otherwise raise an error which is returned. + The try transformation is present is all transformations for OMP, + so it has been made generic and called by most below. + + :arg node_target: The Node to transform - Can instead be provided + as a list of nodes to apply. + :type node_target: :py:class:`Node` + :arg transformation: The transformation to apply + :type transformation: :py:class:`Transformation` + :kwarg options: a dictionary of clause options. + :type options: :py:class:`dict` + ''' + + if options is None: + options = {} + + error_message = "" + + try: + print("Trying") + transformation.apply(node_target, options=options) + + except (TransformationError, IndexError) as err: + error_message = str(err) + print(f"Could not transform " + f"because:\n{error_message}") + + return error_message + + +def try_validation( + node_target: Node, + transformation: Transformation, + options: dict = None +): + ''' + Try the provided transformation provided. + Instead with a validate as opposed to apply + Otherwise raise an error which is returned. + The try transformation is present is all transformations for OMP, + so it has been made generic and called by most below. + + :arg node_target: The Node to transform - Can instead be provided + as a list of nodes to apply. + :type node_target: :py:class:`Node` + :arg transformation: The transformation to apply + :type transformation: :py:class:`Transformation` + :kwarg options: a dictionary of clause options. + :type options: :py:class:`dict` + ''' + + if options is None: + options = {} + + error_message = "" + + try: + transformation.validate(node_target, options=options) + + except (TransformationError, IndexError) as err: + error_message = str(err) + print(f"Could not transform " + f"because:\n{error_message}") + + return error_message + + +def update_ignore_list( + loop_node, + current_options, + override_class + ): + ''' + Pass in a loop_node and array of loop_tag_overrides objects. + Check each object and if the tag matches the loop, check the + know list, and append new options. + ''' + + # Setup References + loop_tag = str(loop_node.loop_type) + current_ignore_list = [] + + overrides = override_class.get_tag_overrides() + if overrides: + for override_obj in overrides: + if loop_tag == override_obj.get_loop_tag(): + override_options = override_obj.options() + if "ignore_dependencies_for" in override_options: + current_ignore_list.append( + override_options["ignore_dependencies_for"]) + + current_options["ignore_dependencies_for"] = current_ignore_list + + return current_options + + +def work_out_collapse_depth(loop_node, options): + ''' + Generate a value for how many collapses to specifically + do given the number of children. + ''' + + # The default number of loops is 1 given self + n_collapse = 1 + # Are there any loop children, will return array of child + # nodes. + child_loop_list = get_descendents(loop_node, Loop) + if child_loop_list: + # Add the length of the node array, or the number of + # to the n_collapse value to return + n_collapse = n_collapse + len(child_loop_list) + + if n_collapse > 1: + options["collapse"] = n_collapse + + return options diff --git a/psytran/prop_trans.py b/psytran/prop_trans.py new file mode 100644 index 0000000..e69e48e --- /dev/null +++ b/psytran/prop_trans.py @@ -0,0 +1,55 @@ +############################################################################## +# Copyright (c) 2025, Met Office, on behalf of HMSO and Queen's Printer +# For further details please refer to the file LICENCE.original which you +# should have received as part of this distribution. +############################################################################## + +''' +Holds class with getter wrappers for global transformations which are +pre-configured. Intended to reduce duplication when defining transformations. +''' + + +from psyclone.transformations import (OMPLoopTrans, OMPParallelTrans) + + +class PropTrans: + ''' + Class to store pre-configured transformations with getters + ''' + def __init__(self): + ''' + Initialise class with default properties + ''' + + # Setup transformations and their properties + # OMP parallel do transformation + self._omp_transform_par_do = OMPLoopTrans( + omp_schedule="static", + omp_directive="paralleldo") + self._omp_parallel = OMPParallelTrans() + self._omp_transform_do = OMPLoopTrans( + omp_schedule="static", + omp_directive="do") + + # Getters + def omp_transform_par_do(self): + + ''' + Get pre configured omp_transform_par_do + ''' + return self._omp_transform_par_do + + def omp_parallel(self): + + ''' + Get pre configured omp_parallel + ''' + return self._omp_parallel + + def omp_transform_do(self): + + ''' + Get pre configured omp_transform_do + ''' + return self._omp_transform_do diff --git a/psytran/transmute_cpu.py b/psytran/transmute_cpu.py new file mode 100644 index 0000000..a4b2962 --- /dev/null +++ b/psytran/transmute_cpu.py @@ -0,0 +1,98 @@ +# (C) Crown Copyright 2025, Met Office. All rights reserved. +# +# This file is part of PSyTran and is released under the BSD 3-Clause license. +# See LICENSE in the root of the repository for full licensing details. + +''' +Top level function(s) intended to be callable by +PSyclone Transmute global and override functions. +''' + +from __future__ import print_function +from psyclone.psyir.nodes import Loop +from psytran.family import ( + update_ignore_list, + check_omp_ancestry, + span_parallel, + try_transformation) +from psytran.transmute_rules import ( + OverridesClass, + validate_rules) +from psytran.prop_trans import PropTrans + +__all__ = [ + "try_loop_omp_pardo" +] + + +def try_loop_omp_pardo(loop_node, + override_class + ): + ''' + Called inside a loop running through the files schedule, and processing + each loop one at a time, which occurs here. + + First it checks if a list of ignore dependencies objects has been provided + + Then it spans some parallel regions + + Then it adds in either parallel do or do clause to the loop node + + OpenMP ancestry for the loop is checked where relevant. + + :arg loop_node: The Loop node to transform + :type loop_node: :py:class:`Loop` + :arg override_class: Class containing a list of override classes to check + against for an ignore_dependencies_for list, etc. + Also contains master override settings. + :type OverridesClass: :py:class:`OverridesClass` + ''' + + if not isinstance(loop_node, Loop): + raise TypeError(f"Expected a loop node \ + '{type(Loop)}'.") + + if not isinstance(override_class, OverridesClass): + raise TypeError(f"Expected a tag_override object, not \ + '{type(override_class)}'.") + + # options dict setup + options = {} + + trans = PropTrans() + + # If there is an loop_tag_overrides_list, work through the objects + # and update the options ignore_dependencies_for with tags where the + # loop tags match + options = update_ignore_list(loop_node, options, override_class) + + # Check if the ancestry for a omp_transform_par_do and a + # omp_transform_do transformation is correct for the given node + # We expect there to be no parallel ancestry for either transformation + # when we are attempting to span a parallel region. + if (not check_omp_ancestry(loop_node, trans.omp_transform_par_do()) or + check_omp_ancestry(loop_node, trans.omp_transform_do())): + span_parallel(loop_node, override_class.get_loop_max_qty()) + + # Given whether the loop is now currently in a parallel section + # change the OMP transformation to the correct option. + # Either parallel do, if there is no parallel region above + # or do + # check_omp_ancestry for the omp_transform_do will return false if there + # is a parallel section for a omp_transform_do transformation + # default transformation will be parallel do + if not check_omp_ancestry(loop_node, trans.omp_transform_do()): + transformation = trans.omp_transform_do() + else: + transformation = trans.omp_transform_par_do() + + # Check the ability to transform given OMP ancestry + if not check_omp_ancestry(loop_node, transformation): + + loop_continue, options = validate_rules(loop_node, options) + + # Given the rule checks above, if they are all clear + if loop_continue: + # Try the transformation - either a OMP parallel do or do + error = try_transformation(loop_node, transformation, options) + print(error) diff --git a/psytran/transmute_rules.py b/psytran/transmute_rules.py new file mode 100644 index 0000000..d330a8c --- /dev/null +++ b/psytran/transmute_rules.py @@ -0,0 +1,119 @@ +# (C) Crown Copyright 2023, Met Office. All rights reserved. +# +# This file is part of PSyTran and is released under the BSD 3-Clause license. +# See LICENSE in the root of the repository for full licensing details. +''' +File to cohabitate all rules relating to overrides. +Contains override classes to be used to change some settings in +transmute functions. Override scripts will set their own versions of this +object. +Also contains functions which are called to work through a series of known +rules to check the validity of applying a loop. +''' + +from psytran.family import ( + string_match_ref_var, + work_out_collapse_depth) + + +class TagOverride: + ''' + Class to store combined metadata of an ignore list associated with a + loop tag. + This data will be provided by a global.py or file override which calls + global. These will need to be found by user and manually added to this + object in global.py or file override for the transmute method. + ''' + def __init__( + self, + loop_tag, + options=None + ): + ''' + Initialise TagOverride with a loop tag and an options list + ''' + # Validation checks into class + if options is None: + options = {} + if not isinstance(options, dict): + raise TypeError(f"Expected a options dict, not \ + '{type(options)}'.") + + self._loop_tag = loop_tag + self._options = options + + # Getters + def get_loop_tag(self): + ''' + Return the loop tag of the class, name of the loop index. + Name tag has been set by Loop.set_loop_type_inference_rules + in the global script. + ''' + return self._loop_tag + + def options(self): + ''' + Return the options list of the class + ''' + return self._options + + +class OverridesClass: + ''' + Class to act as a full override for the global script. + This will adjust settings used functions later on. + This will contain a list of specific overrides for given loop tags. + ''' + def __init__(self, + loop_max_qty=None, + tag_overrides=None + ): + + if tag_overrides is None: + self._tag_overrides = [] + else: + for override in tag_overrides: + if not isinstance(override, TagOverride): + raise TypeError(f"Expected a tag_override object, not \ + '{type(override)}'.") + # Pass through the list of accepted loop tag overrides + self._tag_overrides = tag_overrides + + # setup default values for object properties + self._loop_max_qty = 12 + + # Override the defaults with provided values + if loop_max_qty: + self._loop_max_qty = loop_max_qty + + # Getters + def get_loop_max_qty(self): + ''' + Return the loop max value for number of loops that a parallel section + will span over in span_parallel. + ''' + return self._loop_max_qty + + def get_tag_overrides(self): + ''' + A list of TagOverride objects. Set the loop_tag which the object is for + and it's associated options list for the transformation. + ''' + return self._tag_overrides + + +def validate_rules(loop_node, options): + ''' + Function to store calls to all validations of loop. + We can bundle them together with a result. + ''' + + valid_loop = False + check_struct = False + check_struct = string_match_ref_var(loop_node) + + if not check_struct: + options = work_out_collapse_depth(loop_node, options) + valid_loop = True + + return valid_loop, options