From 8d39d01b8793efbc5f761cf2e096d8a7c55b225b Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 29 Sep 2025 12:39:32 -0700 Subject: [PATCH] run: Resolve workflow names using pathogen registration, if available This decoupling of workflow names from paths was always expected and intended to be possible, and now it is. Related-to: Related-to: --- CHANGES.md | 28 ++++++++++++++ doc/changes.md | 29 +++++++++++++++ doc/commands/run.rst | 4 +- nextstrain/cli/command/run.py | 4 +- nextstrain/cli/pathogens.py | 70 ++++++++++++++++++++++++++++++++++- 5 files changed, 131 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index bd0a9d29..e8e9b725 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,34 @@ development source code and as such may not be routinely kept up to date. # __NEXT__ +## Improvements + +* `nextstrain run` now resolves workflow names by looking in the pathogen + registration (`nextstrain-pathogen.yaml`) for an explicitly registered path. + If no path is registered for a workflow, `nextstrain run` still falls back to + using the workflow name for the workflow path. + + This allows for workflow names that are not also directory paths within the + pathogen source, which is useful for pathogens that are structured + non-conventionally for one reason or another. The decoupling of workflow + names from paths also means that the workflow can be relocated within the + pathogen repo without breaking the name (i.e. the external interface to the + workflow). + + As an example, the following workflow registration: + + ```yaml + workflows: + phylogenetic: + path: . + compatibility: + nextstrain run: yes + ``` + + would allow invocation of a `phylogenetic` workflow located at the top-level + of the pathogen source, such as in [zika-tutorial](https://github.com/nextstrain/zika-tutorial). + ([#481](https://github.com/nextstrain/cli/pull/481)) + # 10.3.0 (26 September 2025) diff --git a/doc/changes.md b/doc/changes.md index 859ec936..1dc93461 100644 --- a/doc/changes.md +++ b/doc/changes.md @@ -16,6 +16,35 @@ development source code and as such may not be routinely kept up to date. (v-next)= ## __NEXT__ +(v-next-improvements)= +### Improvements + +* `nextstrain run` now resolves workflow names by looking in the pathogen + registration (`nextstrain-pathogen.yaml`) for an explicitly registered path. + If no path is registered for a workflow, `nextstrain run` still falls back to + using the workflow name for the workflow path. + + This allows for workflow names that are not also directory paths within the + pathogen source, which is useful for pathogens that are structured + non-conventionally for one reason or another. The decoupling of workflow + names from paths also means that the workflow can be relocated within the + pathogen repo without breaking the name (i.e. the external interface to the + workflow). + + As an example, the following workflow registration: + + ```yaml + workflows: + phylogenetic: + path: . + compatibility: + nextstrain run: yes + ``` + + would allow invocation of a `phylogenetic` workflow located at the top-level + of the pathogen source, such as in [zika-tutorial](https://github.com/nextstrain/zika-tutorial). + ([#481](https://github.com/nextstrain/cli/pull/481)) + (v10-3-0)= ## 10.3.0 (26 September 2025) diff --git a/doc/commands/run.rst b/doc/commands/run.rst index 81b70157..e79acf3d 100644 --- a/doc/commands/run.rst +++ b/doc/commands/run.rst @@ -72,7 +72,9 @@ positional arguments for valid workflow names. Workflow names conventionally correspond directly to directory - paths in the pathogen source, but this may not always be the case. + paths in the pathogen source, but this may not always be the case: + the pathogen's registration info can provide an explicit path for a + workflow name. Required. diff --git a/nextstrain/cli/command/run.py b/nextstrain/cli/command/run.py index 6a5520c9..1966a85b 100644 --- a/nextstrain/cli/command/run.py +++ b/nextstrain/cli/command/run.py @@ -80,7 +80,9 @@ def register_parser(subparser): for valid workflow names. Workflow names conventionally correspond directly to directory - paths in the pathogen source, but this may not always be the case. + paths in the pathogen source, but this may not always be the case: + the pathogen's registration info can provide an explicit path for a + workflow name. Required. """)) diff --git a/nextstrain/cli/pathogens.py b/nextstrain/cli/pathogens.py index 5dd01726..280d45e1 100644 --- a/nextstrain/cli/pathogens.py +++ b/nextstrain/cli/pathogens.py @@ -342,8 +342,73 @@ def compatible_workflows(self, feature: str) -> Dict[str, Dict]: } - def workflow_path(self, workflow: str) -> Path: - return self.path / workflow + def workflow_registration(self, name: str) -> Optional[dict]: + """ + Returns the registration dictionary for the workflow *name*. + + Returns ``None`` if the workflow is not registered, does not have + registration information, or the registered information is not a + dictionary. + """ + if (info := self.registered_workflows().get(name)) and not isinstance(info, dict): + debug(f"pathogen registration.workflows[{name!r}] is not a dict (got a {type(info).__name__})") + return None + + return info + + + def workflow_path(self, name: str) -> Path: + if (info := self.workflow_registration(name)) and (path := info.get("path")): + debug(f"pathogen registration specifies {path!r} for workflow {name!r}") + + # Forbid anchored paths in registration info, as it's never correct + # practice. An anchored path is just an absolute path on POSIX + # systems but covers more "absolute-like" cases on Windows systems + # too. + if PurePath(path).anchor: + raise UserError(f""" + The {self.registration_path.name} file for {str(self)!r} + registers an anchored path for the workflow {name!r}: + + {path} + + Registered workflow paths must be relative to (and within) + the pathogen source itself. This is a mistake that the + pathogen author(s) must fix. + """) + + # Ensure the relative path resolves _within_ the pathogen repo to + # avoid shenanigans. + resolved_pathogen_path = self.path.resolve() + resolved_workflow_path = (resolved_pathogen_path / path).resolve() + + # Path.is_relative_to() was added in Python 3.9, so implement it + # ourselves around .relative_to(). + try: + resolved_workflow_path.relative_to(resolved_pathogen_path) + except ValueError: + raise UserError(f""" + The {self.registration_path.name} file for {str(self)!r} + registers an out-of-bounds path for the workflow {name!r}: + + {path} + + which resolves to: + + {str(resolved_workflow_path)} + + which is outside of the pathogen's source. + + Registered workflow paths must be within the pathogen + source itself. This is a mistake that the pathogen + author(s) must fix. + """) + + debug(f"resolved workflow {name!r} to {str(resolved_workflow_path)!r}") + return resolved_workflow_path + + debug(f"pathogen registration does not specify path for workflow {name!r}; using name as path") + return self.path / name def setup(self, dry_run: bool = False, force: bool = False) -> SetupStatus: @@ -747,6 +812,7 @@ def __init__(self, path: str): registered_workflows = PathogenVersion.registered_workflows compatible_workflows = PathogenVersion.compatible_workflows + workflow_registration = PathogenVersion.workflow_registration workflow_path = PathogenVersion.workflow_path def __str__(self) -> str: