diff --git a/CHANGES.md b/CHANGES.md index f57e678..5a88e93 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -33,4 +33,24 @@ - Introduced build_link_to_jnb method for creating STAC-compatible notebook links with metadata on kernel, environment, and containerization. - Added originating application platform metadata to generated OGC API records for - DeepESDL experiments and workflows. \ No newline at end of file + DeepESDL experiments and workflows. + +## Changes in 0.1.6 + +- Publisher now supports `mode` parameter, This allows more flexible publishing: + - `"dataset"` → publish dataset only + - `"workflow"` → publish workflow only + - `"all"` → publish both (default) + +- CLI: the `publish` command now auto-detects dataset vs workflow configs and also accepts + --dataset-config / --workflow-config; single-file calls use -m to disambiguate + (e.g., deep-code publish workflow.yaml -m workflow). + +- Contacts in OGC API records no longer include default or empty fields, only + properties explicitly defined in the workflow configuration will now be generated. + +- Enhanced GitHub automation to automatically fork synchronize with upstream before + committing and opening a PR to ensure branches are always up-to-date. + +- Prevented duplicate item and self links when updating base catalogs of workflows and + experiments. \ No newline at end of file diff --git a/README.md b/README.md index d82ecce..ea15223 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,8 @@ catalog ### Usage ``` -deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] +deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] [--mode +all|dataset|workflow] ``` #### Arguments @@ -104,8 +105,12 @@ deep-code publish DATASET_CONFIG WORKFLOW_CONFIG [--environment ENVIRONMENT] (e.g., workflow-config.yaml) #### Options + --dataset-config, - Explict path to dataset config + --workflow-config, - Explicit path to workflow config --environment, -e - Target catalog environment: production (default) | staging | testing + --mode, -m Publishing mode: + all (default) | dataset | workflow #### Examples: 1. Publish to staging catalog @@ -120,6 +125,18 @@ deep-code publish dataset-config.yaml workflow-config.yaml -e testing ``` deep-code publish dataset-config.yaml workflow-config.yaml ``` +4. Publish Dataset only +``` +deep-code publish dataset-config.yaml -m dataset + +deep-code publish --dataset-config dataset.yaml -m dataset +``` +5. Publish Workflow only +``` +deep-code publish dataset-config.yaml -m workflow + +deep-code publish --workflow-config workflow.yaml -m dataset +``` #### dataset-config.yaml example ``` diff --git a/deep_code/cli/publish.py b/deep_code/cli/publish.py index d3c9001..19f1a0d 100644 --- a/deep_code/cli/publish.py +++ b/deep_code/cli/publish.py @@ -4,14 +4,212 @@ # Permissions are hereby granted under the terms of the MIT License: # https://opensource.org/licenses/MIT. +from pathlib import Path +from typing import Literal + import click +import yaml from deep_code.tools.publish import Publisher +Mode = Literal["all", "dataset", "workflow"] + +DATASET_MARKERS = { + "stac_version", + "extent", + "license", + "summaries", + "assets", + "providers", + "collection", + "collection_id", + "id", +} +WORKFLOW_MARKERS = { + "workflow", + "workflow_id", + "workflow_title", + "experiment", + "jupyter_notebook_url", + "notebook", + "parameters", + "input_datasets", +} + + +def _validate_inputs( + dataset_config: str | None, workflow_config: str | None, mode: str +): + mode = mode.lower() + + def ensure_file(path: str | None, label: str): + if path is None: + raise click.UsageError(f"{label} is required but was not provided.") + if not Path(path).is_file(): + raise click.UsageError(f"{label} not found: {path} is not a file") + + if mode == "dataset": + ensure_file(dataset_config, "DATASET_CONFIG") + if workflow_config is not None: + click.echo("Ignoring WORKFLOW_CONFIG since mode=dataset.", err=True) + + elif mode == "workflow": + ensure_file(workflow_config, "WORKFLOW_CONFIG") + + elif mode == "all": + ensure_file(dataset_config, "DATASET_CONFIG") + ensure_file(workflow_config, "WORKFLOW_CONFIG") + + else: + raise click.UsageError("Invalid mode. Choose one of: all, dataset, workflow.") + + +def _detect_config_type(path: Path) -> Literal["dataset", "workflow"]: + """Detect config type via filename hints and YAML top-level keys.""" + name = path.name.lower() + if "workflow" in name or "experiment" in name: + return "workflow" + if "dataset" in name or "collection" in name: + return "dataset" + + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) + except Exception as e: + raise ValueError(f"Cannot read YAML from {path}: {e}") + + if not isinstance(data, dict): + raise ValueError(f"YAML in {path} must be a mapping/object at the top level.") + + keys = set(data.keys()) + ds_score = len(keys & DATASET_MARKERS) + wf_score = len(keys & WORKFLOW_MARKERS) + + if ds_score > wf_score: + return "dataset" + if wf_score > ds_score: + return "workflow" + + raise ValueError( + f"Ambiguous config type for {path}. " + "Rename to include 'dataset' or 'workflow', or pass the missing file explicitly." + ) + + +def _assign_configs( + pos_first: str | None, + pos_second: str | None, + mode: Mode, + explicit_dataset: str | None, + explicit_workflow: str | None, +) -> tuple[str | None, str | None]: + """ + Decide which file is dataset vs workflow. + Precedence: explicit flags > positional + detection. + Returns (dataset_config, workflow_config). + """ + ds = explicit_dataset + wf = explicit_workflow + + # If both explicit provided, we're done; warn if extra positionals are passed. + pos_args = [p for p in (pos_first, pos_second) if p] + if ds and wf: + if pos_args: + click.echo( + "Positional config paths ignored because explicit flags were provided.", + err=True, + ) + return ds, wf + + # Helper to assign a single positional file to the missing slot + def _assign_single(p: str) -> tuple[str | None, str | None]: + nonlocal ds, wf + if ds and wf: + raise click.UsageError( + "Both dataset and workflow configs already provided; remove extra positional files." + ) + # Use mode as a strong hint when only one is missing + if not ds and mode == "dataset": + ds = p + return + if not wf and mode == "workflow": + wf = p + return + # Otherwise detect + kind = _detect_config_type(Path(p)) + if kind == "dataset": + if ds and Path(ds).resolve() != Path(p).resolve(): + raise click.UsageError( + f"Multiple dataset configs supplied: {ds} and {p}" + ) + ds = p + else: + if wf and Path(wf).resolve() != Path(p).resolve(): + raise click.UsageError( + f"Multiple workflow configs supplied: {wf} and {p}" + ) + wf = p + + # If exactly one explicit provided, try to fill the other via positionals + if ds and not wf: + if len(pos_args) > 1: + raise click.UsageError( + "Provide at most one positional file when using --dataset-config." + ) + if pos_args: + _assign_single(pos_args[0]) + return ds, wf + + if wf and not ds: + if len(pos_args) > 1: + raise click.UsageError( + "Provide at most one positional file when using --workflow-config." + ) + if pos_args: + _assign_single(pos_args[0]) + return ds, wf + + # No explicit flags: rely on positionals + detection + if not pos_args: + return None, None + if len(pos_args) == 1: + p = pos_args[0] + if mode == "dataset": + return p, None + if mode == "workflow": + return None, p + # mode == "all": detect and require the other later in validation + kind = _detect_config_type(Path(p)) + return (p, None) if kind == "dataset" else (None, p) + + # Two positionals: detect both and assign + p1, p2 = pos_args[0], pos_args[1] + k1 = _detect_config_type(Path(p1)) + k2 = _detect_config_type(Path(p2)) + if k1 == k2: + raise click.UsageError( + f"Both files look like '{k1}' configs: {p1} and {p2}. " + "Please rename one or use --dataset-config/--workflow-config." + ) + ds = p1 if k1 == "dataset" else p2 + wf = p1 if k1 == "workflow" else p2 + return ds, wf + @click.command(name="publish") -@click.argument("dataset_config", type=click.Path(exists=True)) -@click.argument("workflow_config", type=click.Path(exists=True)) +@click.argument("dataset_config", type=click.Path(exists=True), required=False) +@click.argument("workflow_config", type=click.Path(exists=True), required=False) +@click.option( + "--dataset-config", + "dataset_config_opt", + type=click.Path(exists=True), + help="Explicit path to dataset config (overrides positional detection).", +) +@click.option( + "--workflow-config", + "workflow_config_opt", + type=click.Path(exists=True), + help="Explicit path to workflow config (overrides positional detection).", +) @click.option( "--environment", "-e", @@ -19,13 +217,48 @@ default="production", help="Target environment for publishing (production, staging, testing)", ) -def publish(dataset_config, workflow_config, environment): - """Request publishing a dataset along with experiment and workflow metadata to the - open science catalogue. +@click.option( + "--mode", + "-m", + type=click.Choice(["all", "dataset", "workflow"], case_sensitive=False), + default="all", + help="Publishing mode: dataset only, workflow only, or both", +) +def publish( + dataset_config, + workflow_config, + dataset_config_opt, + workflow_config_opt, + environment, + mode, +): + """ + Publish dataset and/or workflow/experiment metadata. + + Examples: + deep-code publish workflow.yaml -e staging -m workflow + deep-code publish dataset.yaml -e staging -m dataset + deep-code publish dataset.yaml workflow.yaml -m all + deep-code publish --dataset-config dataset.yaml --workflow-config wf.yaml -m all + deep-code publish --dataset-config dataset.yaml -m dataset + deep-code publish --workflow-config wf.yaml -m workflow """ + mode = mode.lower() + ds_path, wf_path = _assign_configs( + dataset_config, + workflow_config, + mode, # type: ignore[arg-type] + dataset_config_opt, + workflow_config_opt, + ) + + _validate_inputs(ds_path, wf_path, mode) + publisher = Publisher( - dataset_config_path=dataset_config, - workflow_config_path=workflow_config, + dataset_config_path=ds_path, + workflow_config_path=wf_path, environment=environment.lower(), ) - publisher.publish_all() + result = publisher.publish(mode=mode) + + click.echo(result if isinstance(result, str) else "Wrote files locally.") diff --git a/deep_code/tests/tools/test_publish.py b/deep_code/tests/tools/test_publish.py index d5cace9..42dbc47 100644 --- a/deep_code/tests/tools/test_publish.py +++ b/deep_code/tests/tools/test_publish.py @@ -110,6 +110,69 @@ def test_read_config_files(self): self.assertEqual(self.publisher.dataset_config, dataset_config) self.assertEqual(self.publisher.workflow_config, workflow_config) + @patch("deep_code.tools.publish.GitHubPublisher") + def test_environment_repo_selection(self, mock_gp): + Publisher(environment="production") + assert mock_gp.call_args.kwargs["repo_name"] == "open-science-catalog-metadata" + Publisher(environment="staging") + assert ( + mock_gp.call_args.kwargs["repo_name"] + == "open-science-catalog-metadata-staging" + ) + Publisher(environment="testing") + assert ( + mock_gp.call_args.kwargs["repo_name"] + == "open-science-catalog-metadata-testing" + ) + + @patch.object(Publisher, "publish_dataset", return_value={"a": {}}) + @patch.object( + Publisher, "generate_workflow_experiment_records", return_value={"b": {}} + ) + def test_publish_mode_routing(self, mock_wf, mock_ds): + # dataset only + self.publisher.publish(write_to_file=True, mode="dataset") + mock_ds.assert_called() + mock_wf.assert_not_called() + + mock_ds.reset_mock() + mock_wf.reset_mock() + self.publisher.publish(write_to_file=True, mode="workflow") + mock_ds.assert_not_called() + mock_wf.assert_called() + + @patch.object(Publisher, "generate_workflow_experiment_records", return_value={}) + @patch.object(Publisher, "publish_dataset", return_value={}) + def test_publish_nothing_to_publish_raises( + self, mock_publish_dataset, mock_generate_workflow_experiment_records + ): + with pytest.raises(ValueError): + self.publisher.publish(write_to_file=False, mode="dataset") + mock_publish_dataset.assert_called_once() + mock_generate_workflow_experiment_records.assert_not_called() + + @patch.object(Publisher, "publish_dataset", return_value={"x": {}}) + @patch.object( + Publisher, "generate_workflow_experiment_records", return_value={"y": {}} + ) + def test_publish_builds_pr_params(self, mock_wf, mock_ds): + # Make PR creation return a fixed URL + self.publisher.gh_publisher.publish_files.return_value = "PR_URL" + + # Provide IDs for commit/PR labels + self.publisher.collection_id = "col" + self.publisher.workflow_id = "wf" + + url = self.publisher.publish(write_to_file=False, mode="all") + assert url == "PR_URL" + + # Inspect the call arguments to publish_files + _, kwargs = self.publisher.gh_publisher.publish_files.call_args + assert "dataset: col" in kwargs["commit_message"] + assert "workflow/experiment: wf" in kwargs["commit_message"] + assert "dataset: col" in kwargs["pr_title"] + assert "workflow/experiment: wf" in kwargs["pr_title"] + class TestParseGithubNotebookUrl: @pytest.mark.parametrize( diff --git a/deep_code/tests/utils/test_github_automation.py b/deep_code/tests/utils/test_github_automation.py index efa284c..671adb3 100644 --- a/deep_code/tests/utils/test_github_automation.py +++ b/deep_code/tests/utils/test_github_automation.py @@ -1,171 +1,362 @@ import logging import unittest from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch from deep_code.utils.github_automation import GitHubAutomation +def make_cp(stdout: str = ""): + """Helper to mimic subprocess.CompletedProcess-like return for our mocks.""" + m = MagicMock() + m.stdout = stdout + return m + + class TestGitHubAutomation(unittest.TestCase): def setUp(self): - # Set up test data self.username = "testuser" self.token = "testtoken" self.repo_owner = "testowner" self.repo_name = "testrepo" - self.github_automation = GitHubAutomation( - self.username, self.token, self.repo_owner, self.repo_name + self.gha = GitHubAutomation( + self.username, + self.token, + self.repo_owner, + self.repo_name, + local_clone_dir="/tmp/temp_repo", ) - logging.disable(logging.CRITICAL) # Disable logging during tests + logging.disable(logging.CRITICAL) def tearDown(self): - logging.disable(logging.NOTSET) # Re-enable logging after tests + logging.disable(logging.NOTSET) @patch("requests.post") def test_fork_repository(self, mock_post): - # Mock the response from GitHub API - mock_response = MagicMock() - mock_response.raise_for_status.return_value = None - mock_post.return_value = mock_response + mock_post.return_value = MagicMock(**{"raise_for_status.return_value": None}) + self.gha.fork_repository() - # Call the method - self.github_automation.fork_repository() - - # Assertions mock_post.assert_called_once_with( f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/forks", headers={"Authorization": f"token {self.token}"}, + timeout=60, ) @patch("subprocess.run") - def test_clone_repository_new(self, mock_run): - # Mock the subprocess.run method - mock_run.return_value = MagicMock() + def test_clone_sync_repository_new(self, mock_run): + """ + No .git directory → we clone and then ensure upstream remote gets added. + """ + # Simulate: "git remote -v" returns nothing so we add 'upstream' + def run_side_effect(args, cwd, check, capture_output=False, text=True): + if args[:3] == ["git", "remote", "-v"]: + return make_cp(stdout="") + return make_cp() + + mock_run.side_effect = run_side_effect + + with patch.object(Path, "mkdir") as _mk, patch( + "pathlib.Path.exists", side_effect=lambda p=None: False + ): + self.gha.clone_sync_repository() + + # Expect a clone, then a 'git remote -v', then 'git remote add upstream ...' + origin_url = f"https://{self.username}:{self.token}@github.com/{self.username}/{self.repo_name}.git" + + expected_calls = [ + # git clone /tmp/temp_repo (cwd=".") + call( + ["git", "clone", origin_url, "/tmp/temp_repo"], + cwd=".", + check=True, + capture_output=False, + text=True, + ), + # ensure_upstream_remote -> remote -v (capture_output=True) + call( + ["git", "remote", "-v"], + cwd="/tmp/temp_repo", + check=True, + capture_output=True, + text=True, + ), + # since not present -> add upstream + call( + [ + "git", + "remote", + "add", + "upstream", + f"https://github.com/{self.repo_owner}/{self.repo_name}.git", + ], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ), + ] + # Only assert these key calls occurred in order (there can be other internal calls) + mock_run.assert_has_calls(expected_calls, any_order=False) + + @patch("subprocess.run") + def test_clone_sync_repository_existing(self, mock_run): + """ + .git exists → we fetch/prune and still ensure upstream remote. + """ + + def run_side_effect(args, cwd, check, capture_output=False, text=True): + if args[:3] == ["git", "remote", "-v"]: + # Pretend we already have no 'upstream' to force add + return make_cp(stdout="") + return make_cp() + + mock_run.side_effect = run_side_effect - # Mock os.path.exists to return False (directory does not exist) - with patch("os.path.exists", return_value=False): - self.github_automation.clone_sync_repository() + # Path.exists should return True when asked about /tmp/temp_repo/.git + def exists_side_effect(self): + return str(self).endswith("/tmp/temp_repo/.git") - # Assertions - mock_run.assert_called_once_with( + with patch("pathlib.Path.exists", new=exists_side_effect): + self.gha.clone_sync_repository() + + mock_run.assert_any_call( + ["git", "fetch", "--all", "--prune"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ) + mock_run.assert_any_call( + ["git", "remote", "-v"], + cwd="/tmp/temp_repo", + check=True, + capture_output=True, + text=True, + ) + mock_run.assert_any_call( [ "git", - "clone", - f"https://{self.username}:{self.token}@github.com/{self.username}/{self.repo_name}.git", - self.github_automation.local_clone_dir, + "remote", + "add", + "upstream", + f"https://github.com/{self.repo_owner}/{self.repo_name}.git", ], + cwd="/tmp/temp_repo", check=True, + capture_output=False, + text=True, ) @patch("subprocess.run") - def test_clone_repository_existing(self, mock_run): - # Mock the subprocess.run method - mock_run.return_value = MagicMock() + def test_sync_fork_with_upstream_merge_strategy(self, mock_run): + """ + Ensure sequence: fetch upstream+origin, checkout/ensure base branch, merge, push. + """ + + def run_side_effect(args, cwd, check, capture_output=False, text=True): + # 'git branch' to check if base exists locally → return no branches, so it creates + if args[:2] == ["git", "branch"]: + return make_cp(stdout="") + if args[:3] == ["git", "remote", "-v"]: + return make_cp( + stdout="upstream\t{url} (fetch)\n".format( + url=f"https://github.com/{self.repo_owner}/{self.repo_name}.git" + ) + ) + return make_cp() + + mock_run.side_effect = run_side_effect + + # Pretend .git exists so we operate in repo + def exists_side_effect(self): + return str(self).endswith("/tmp/temp_repo/.git") - # Mock os.path.exists to return True (directory exists) - with patch("os.path.exists", return_value=True): - with patch("os.chdir"): - self.github_automation.clone_sync_repository() + with patch("pathlib.Path.exists", new=exists_side_effect): + self.gha.sync_fork_with_upstream(base_branch="main", strategy="merge") - # Assertions - mock_run.assert_called_once_with(["git", "pull"], check=True) + # Key steps + mock_run.assert_any_call( + ["git", "fetch", "upstream", "main"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ) + mock_run.assert_any_call( + ["git", "fetch", "origin", "main"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ) + # It may try to create local branch from origin first, then upstream + # We just ensure a merge with upstream and push occurred + mock_run.assert_any_call( + ["git", "merge", "upstream/main"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ) + mock_run.assert_any_call( + ["git", "push", "origin", "main"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ) @patch("subprocess.run") def test_create_branch(self, mock_run): - # Mock the subprocess.run method - mock_run.return_value = MagicMock() - - # Mock os.chdir - with patch("os.chdir"): - self.github_automation.create_branch("test-branch") + mock_run.return_value = make_cp() + # Ensure .git exists + with patch( + "pathlib.Path.exists", + new=lambda self: str(self).endswith("/tmp/temp_repo/.git"), + ): + self.gha.create_branch("feature/x", from_branch="main") - # Assertions - mock_run.assert_called_once_with( - ["git", "checkout", "-b", "test-branch"], check=True + mock_run.assert_has_calls( + [ + call( + ["git", "checkout", "main"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ), + call( + ["git", "checkout", "-B", "feature/x"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ), + ], + any_order=False, ) @patch("subprocess.run") def test_add_file(self, mock_run): - # Mock the subprocess.run method - mock_run.return_value = MagicMock() + mock_run.return_value = make_cp() + with patch.object(Path, "mkdir") as _mk, patch.object( + Path, "write_text" + ) as _wt: + # Ensure .git exists + with patch( + "pathlib.Path.exists", + new=lambda self: str(self).endswith("/tmp/temp_repo/.git"), + ): + self.gha.add_file("dir/file.json", {"k": "v"}) + + _wt.assert_called_once() # JSON written + mock_run.assert_any_call( + ["git", "add", "/tmp/temp_repo/dir/file.json"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ) - # Mock os.chdir and Path - with patch("os.chdir"), patch("pathlib.Path.mkdir"), patch( - "builtins.open", unittest.mock.mock_open() + @patch("subprocess.run") + def test_commit_and_push(self, mock_run): + mock_run.return_value = make_cp() + with patch( + "pathlib.Path.exists", + new=lambda self: str(self).endswith("/tmp/temp_repo/.git"), ): - self.github_automation.add_file("test/file.json", {"key": "value"}) + self.gha.commit_and_push("feat", "my message") - # Assertions - mock_run.assert_called_once_with( + mock_run.assert_has_calls( [ - "git", - "add", - str(Path(self.github_automation.local_clone_dir) / "test/file.json"), + call( + ["git", "checkout", "feat"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ), + call( + ["git", "commit", "-m", "my message"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ), + call( + ["git", "push", "-u", "origin", "feat"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, + ), ], - check=True, + any_order=False, ) @patch("subprocess.run") - def test_commit_and_push(self, mock_run): - # Mock the subprocess.run method - mock_run.return_value = MagicMock() + def test_commit_and_push_nothing_to_commit(self, mock_run): + # Make the commit call raise like 'git' does when nothing to commit + def run_side_effect(args, cwd, check, capture_output=False, text=True): + if args[:2] == ["git", "commit"]: + e = Exception("nothing to commit, working tree clean") + # Mimic our _run raising RuntimeError + raise RuntimeError(str(e)) + return make_cp() - # Mock os.chdir - with patch("os.chdir"): - self.github_automation.commit_and_push("test-branch", "Test commit message") + mock_run.side_effect = run_side_effect + + with patch( + "pathlib.Path.exists", + new=lambda self: str(self).endswith("/tmp/temp_repo/.git"), + ): + # Should not raise; should still push + self.gha.commit_and_push("feat", "msg") - # Assertions - mock_run.assert_any_call( - ["git", "commit", "-m", "Test commit message"], check=True - ) mock_run.assert_any_call( - ["git", "push", "-u", "origin", "test-branch"], check=True + ["git", "push", "-u", "origin", "feat"], + cwd="/tmp/temp_repo", + check=True, + capture_output=False, + text=True, ) @patch("requests.post") def test_create_pull_request(self, mock_post): - # Mock the response from GitHub API - mock_response = MagicMock() - mock_response.raise_for_status.return_value = None - mock_response.json.return_value = {"html_url": "https://github.com/test/pull/1"} - mock_post.return_value = mock_response - - # Mock os.chdir - with patch("os.chdir"): - self.github_automation.create_pull_request( - "test-branch", "Test PR", "Test body" - ) - - # Assertions + mock_post.return_value = MagicMock( + **{ + "raise_for_status.return_value": None, + "json.return_value": {"html_url": "https://github.com/test/pull/1"}, + } + ) + url = self.gha.create_pull_request( + "feat", "PR title", "Body", base_branch="main" + ) + + self.assertEqual(url, "https://github.com/test/pull/1") mock_post.assert_called_once_with( f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/pulls", headers={"Authorization": f"token {self.token}"}, json={ - "title": "Test PR", - "head": f"{self.username}:test-branch", + "title": "PR title", + "head": f"{self.username}:feat", "base": "main", - "body": "Test body", + "body": "Body", }, + timeout=60, ) - @patch("subprocess.run") - def test_clean_up(self, mock_run): - # Mock the subprocess.run method - mock_run.return_value = MagicMock() - - # Mock os.chdir - with patch("os.chdir"): - self.github_automation.clean_up() - - # Assertions - mock_run.assert_called_once_with( - ["rm", "-rf", self.github_automation.local_clone_dir] - ) + @patch("shutil.rmtree") + def test_clean_up(self, mock_rm): + # Simulate repo path exists + with patch("pathlib.Path.exists", return_value=True): + self.gha.clean_up() + mock_rm.assert_called_once_with(Path("/tmp/temp_repo")) - def test_file_exists(self): - # Mock os.path.isfile - with patch("os.path.isfile", return_value=True): - result = self.github_automation.file_exists("test/file.json") + def test_file_exists_true(self): + with patch("pathlib.Path.is_file", return_value=True): + self.assertTrue(self.gha.file_exists("a/b.json")) - # Assertions - self.assertTrue(result) + def test_file_exists_false(self): + with patch("pathlib.Path.is_file", return_value=False): + self.assertFalse(self.gha.file_exists("a/b.json")) diff --git a/deep_code/tests/utils/test_ogc_api_record.py b/deep_code/tests/utils/test_ogc_api_record.py index 6cc3f3f..e302aa1 100644 --- a/deep_code/tests/utils/test_ogc_api_record.py +++ b/deep_code/tests/utils/test_ogc_api_record.py @@ -38,10 +38,10 @@ def test_contact_initialization(self): def test_contact_default_values(self): contact = Contact(name="Jane Doe", organization="DeepESDL") - self.assertEqual(contact.position, "") - self.assertEqual(contact.links, []) - self.assertEqual(contact.contactInstructions, "") - self.assertEqual(contact.roles, ["principal investigator"]) + self.assertEqual(contact.position, None) + self.assertEqual(contact.links, None) + self.assertEqual(contact.contactInstructions, None) + self.assertEqual(contact.roles, None) class TestThemeConcept(unittest.TestCase): diff --git a/deep_code/tools/publish.py b/deep_code/tools/publish.py index 15b3699..fe3f9eb 100644 --- a/deep_code/tools/publish.py +++ b/deep_code/tools/publish.py @@ -2,11 +2,11 @@ # Copyright (c) 2025 by Brockmann Consult GmbH # Permissions are hereby granted under the terms of the MIT License: # https://opensource.org/licenses/MIT. - import copy import logging from datetime import datetime from pathlib import Path +from typing import Any, Literal import fsspec import jsonpickle @@ -61,6 +61,10 @@ def publish_files( commit_message: str, pr_title: str, pr_body: str, + base_branch: str = "main", + sync_strategy: Literal[ + "ff", "rebase", "merge" + ] = "merge", # 'ff' | 'rebase' | 'merge' ) -> str: """Publish multiple files to a new branch and open a PR. @@ -70,12 +74,34 @@ def publish_files( commit_message: Commit message for all changes. pr_title: Title of the pull request. pr_body: Description/body of the pull request. + base_branch: Base branch to branch from and open the PR against (default: "main") + sync_strategy: How to sync local with the remote base before pushing: + - "ff": Fast-forward only (no merge commits; fails if FF not possible). + - "rebase": Rebase local changes onto the updated base branch. + - "merge": Create a merge commit (default). Returns: URL of the created pull request. + + Raises: + ValueError: If an unsupported sync_strategy is provided. """ + + if sync_strategy not in {"ff", "rebase", "merge"}: + raise ValueError( + f'Invalid sync_strategy="{sync_strategy}". ' + 'Accepted values are "ff", "rebase", "merge".' + ) + try: - self.github_automation.create_branch(branch_name) + # Ensure local clone and remotes are ready + self.github_automation.clone_sync_repository() + # Sync fork with upstream before creating the branch/committing + self.github_automation.sync_fork_with_upstream( + base_branch=base_branch, strategy=sync_strategy + ) + + self.github_automation.create_branch(branch_name, from_branch=base_branch) # Add each file to the branch for file_path, content in file_dict.items(): @@ -98,13 +124,14 @@ def publish_files( class Publisher: - """Publishes products (datasets) to the OSC GitHub repository. + """Publishes products (datasets), workflows and experiments to the OSC GitHub + repository. """ def __init__( self, - dataset_config_path: str, - workflow_config_path: str, + dataset_config_path: str | None = None, + workflow_config_path: str | None = None, environment: str = "production", ): self.environment = environment @@ -121,24 +148,37 @@ def __init__( self.collection_id = "" self.workflow_title = "" - # Paths to configuration files + # Paths to configuration files, can be optional self.dataset_config_path = dataset_config_path self.workflow_config_path = workflow_config_path + # Config dicts (loaded lazily) + self.dataset_config: dict[str, Any] = {} + self.workflow_config: dict[str, Any] = {} + + # Values that may be set from configs + self.collection_id: str | None = None + self.workflow_title: str | None = None + self.workflow_id: str | None = None + # Load configuration files self._read_config_files() - self.collection_id = self.dataset_config.get("collection_id") - self.workflow_title = self.workflow_config.get("properties", {}).get("title") - self.workflow_id = self.workflow_config.get("workflow_id") - if not self.collection_id: - raise ValueError("collection_id is missing in dataset config.") + if self.dataset_config: + self.collection_id = self.dataset_config.get("collection_id") + if self.workflow_config: + self.workflow_title = self.workflow_config.get("properties", {}).get( + "title" + ) + self.workflow_id = self.workflow_config.get("workflow_id") def _read_config_files(self) -> None: - with fsspec.open(self.dataset_config_path, "r") as file: - self.dataset_config = yaml.safe_load(file) or {} - with fsspec.open(self.workflow_config_path, "r") as file: - self.workflow_config = yaml.safe_load(file) or {} + if self.dataset_config_path: + with fsspec.open(self.dataset_config_path, "r") as file: + self.dataset_config = yaml.safe_load(file) or {} + if self.workflow_config_path: + with fsspec.open(self.workflow_config_path, "r") as file: + self.workflow_config = yaml.safe_load(file) or {} @staticmethod def _write_to_file(file_path: str, data: dict): @@ -206,10 +246,18 @@ def _update_variable_catalogs(self, generator, file_dict, variable_ids): ) file_dict[var_file_path] = updated_catalog.to_dict() - def publish_dataset(self, write_to_file: bool = False): + def publish_dataset( + self, + write_to_file: bool = False, + mode: Literal["all", "dataset", "workflow"] = "all", + ) -> dict[str, Any]: """Prepare dataset/product collection for publishing to the specified GitHub repository.""" + if not self.dataset_config: + raise ValueError( + "No dataset config loaded. Provide dataset_config_path to publish dataset." + ) dataset_id = self.dataset_config.get("dataset_id") self.collection_id = self.dataset_config.get("collection_id") documentation_link = self.dataset_config.get("documentation_link") @@ -240,7 +288,7 @@ def publish_dataset(self, write_to_file: bool = False): ) variable_ids = generator.get_variable_ids() - ds_collection = generator.build_dataset_stac_collection() + ds_collection = generator.build_dataset_stac_collection(mode=mode) # Prepare a dictionary of file paths and content file_dict = {} @@ -285,39 +333,74 @@ def _normalize_name(name: str | None) -> str | None: def _update_base_catalog( self, catalog_path: str, item_id: str, self_href: str ) -> Catalog: - """Update a base catalog by adding a link to a new item. + """Update a base catalog by adding a unique item link and a single self link. Args: - catalog_path: Path to the base catalog JSON file. - item_id: ID of the new item (experiment or workflow). - self_href: Self-href for the base catalog. + catalog_path: Path to the base catalog JSON file, relative to the repo root. + item_id: ID (directory name) of the new item (workflow/experiment). + self_href: Absolute self-href for the base catalog. Returns: - Updated Catalog object. + The updated PySTAC Catalog object (in-memory). """ - # Load the base catalog base_catalog = Catalog.from_file( Path(self.gh_publisher.github_automation.local_clone_dir) / catalog_path ) - # Add a link to the new item - base_catalog.add_link( - Link( - rel="item", - target=f"./{item_id}/record.json", - media_type="application/json", - title=f"{self.workflow_title}", + item_href = f"./{item_id}/record.json" + + def resolve_href(link: Link) -> str | None: + # PySTAC keeps raw targets; get_href() may resolve relative paths if a base HREF is set + return ( + getattr(link, "href", None) + or getattr(link, "target", None) + or (link.get_href() if hasattr(link, "get_href") else None) ) + + # 1) Add the "item" link only if it's not already present + has_item = any( + (link.rel == "item") and (resolve_href(link) == item_href) + for link in base_catalog.links ) + if not has_item: + base_catalog.add_link( + Link( + rel="item", + target=item_href, + media_type="application/json", + title=self.workflow_title, + ) + ) - # Set the self-href for the base catalog + # 2) Ensure there is exactly one "self" link + base_catalog.links = [link for link in base_catalog.links if link.rel != "self"] base_catalog.set_self_href(self_href) + # 3) Defense-in-depth: deduplicate by (rel, href) + seen: set[tuple[str, str | None]] = set() + unique_links: list[Link] = [] + for link in base_catalog.links: + key = (link.rel, resolve_href(link)) + if key not in seen: + unique_links.append(link) + seen.add(key) + base_catalog.links = unique_links + return base_catalog - def generate_workflow_experiment_records(self, write_to_file: bool = False) -> None: + def generate_workflow_experiment_records( + self, + write_to_file: bool = False, + mode: Literal["all", "dataset", "workflow"] = "all", + ) -> dict[str, Any]: """prepare workflow and experiment as ogc api record to publish it to the specified GitHub repository.""" + + file_dict = {} + + if mode not in {"workflow", "all"}: + return file_dict # nothing to do for mode="dataset" + workflow_id = self._normalize_name(self.workflow_config.get("workflow_id")) if not workflow_id: raise ValueError("workflow_id is missing in workflow config.") @@ -353,6 +436,8 @@ def generate_workflow_experiment_records(self, write_to_file: bool = False) -> N jupyter_notebook_url=jupyter_notebook_url, themes=osc_themes, ) + if mode == "all": + link_builder.build_child_link_to_related_experiment() # Convert to dictionary and cleanup workflow_dict = workflow_record.to_dict() if "jupyter_notebook_url" in workflow_dict: @@ -367,41 +452,50 @@ def generate_workflow_experiment_records(self, write_to_file: bool = False) -> N exp_record_properties.type = "experiment" exp_record_properties.osc_workflow = workflow_id - dataset_link = link_builder.build_link_to_dataset(self.collection_id) - - experiment_record = ExperimentAsOgcRecord( - id=workflow_id, - title=self.workflow_title, - type="Feature", - jupyter_notebook_url=jupyter_notebook_url, - collection_id=self.collection_id, - properties=exp_record_properties, - links=links + theme_links + dataset_link, - ) - # Convert to dictionary and cleanup - experiment_dict = experiment_record.to_dict() - if "jupyter_notebook_url" in experiment_dict: - del experiment_dict["jupyter_notebook_url"] - if "collection_id" in experiment_dict: - del experiment_dict["collection_id"] - if "osc:project" in experiment_dict["properties"]: - del experiment_dict["properties"]["osc:project"] - # add experiment record to file_dict - exp_file_path = f"experiments/{workflow_id}/record.json" - file_dict[exp_file_path] = experiment_dict - - # Update base catalogs of experiments and workflows with links - file_dict["experiments/catalog.json"] = self._update_base_catalog( - catalog_path="experiments/catalog.json", - item_id=workflow_id, - self_href=EXPERIMENT_BASE_CATALOG_SELF_HREF, - ) - + # Update base catalogs of workflows with links file_dict["workflows/catalog.json"] = self._update_base_catalog( catalog_path="workflows/catalog.json", item_id=workflow_id, self_href=WORKFLOW_BASE_CATALOG_SELF_HREF, ) + + if mode in ["all"]: + if not getattr(self, "collection_id", None): + raise ValueError( + "collection_id is required to generate the experiment record when mode='all' " + "(the experiment links to the output dataset)." + ) + # generate experiment record only if there is an output dataset + dataset_link = link_builder.build_link_to_dataset(self.collection_id) + + experiment_record = ExperimentAsOgcRecord( + id=workflow_id, + title=self.workflow_title, + type="Feature", + jupyter_notebook_url=jupyter_notebook_url, + collection_id=self.collection_id, + properties=exp_record_properties, + links=links + theme_links + dataset_link, + ) + # Convert to dictionary and cleanup + experiment_dict = experiment_record.to_dict() + if "jupyter_notebook_url" in experiment_dict: + del experiment_dict["jupyter_notebook_url"] + if "collection_id" in experiment_dict: + del experiment_dict["collection_id"] + if "osc:project" in experiment_dict["properties"]: + del experiment_dict["properties"]["osc:project"] + # add experiment record to file_dict + exp_file_path = f"experiments/{workflow_id}/record.json" + file_dict[exp_file_path] = experiment_dict + + # Update base catalogs of experiments with links + file_dict["experiments/catalog.json"] = self._update_base_catalog( + catalog_path="experiments/catalog.json", + item_id=workflow_id, + self_href=EXPERIMENT_BASE_CATALOG_SELF_HREF, + ) + # Write to files if testing if write_to_file: for file_path, data in file_dict.items(): @@ -409,43 +503,70 @@ def generate_workflow_experiment_records(self, write_to_file: bool = False) -> N return {} return file_dict - def publish_all(self, write_to_file: bool = False): - """Publish both dataset and workflow/experiment in a single PR.""" - # Get file dictionaries from both methods - dataset_files = self.publish_dataset(write_to_file=write_to_file) - workflow_files = self.generate_workflow_experiment_records( - write_to_file=write_to_file - ) + def publish( + self, + write_to_file: bool = False, + mode: Literal["all", "dataset", "workflow"] = "all", + ) -> dict[str, Any] | str: + """ + Publish both dataset and workflow/experiment in a single PR. + Args: + write_to_file: If True, write JSON files locally and return the generated dict(s). + If False, open a PR and return the PR URL. + mode: Select which artifacts to publish: + - "dataset": only dataset collection & related catalogs + - "workflow": only workflow records + - "all": both + Returns: + dict[str, Any] when write_to_file=True (the files written), + or str when write_to_file=False (the PR URL). + """ - # Combine the file dictionaries - combined_files = {**dataset_files, **workflow_files} + files: dict[str, Any] = {} - if not write_to_file: - # Create branch name, commit message, PR info - branch_name = ( - f"{OSC_BRANCH_NAME}-{self.collection_id}" - f"-{datetime.now().strftime('%Y%m%d%H%M%S')}" - ) - commit_message = ( - f"Add new dataset collection: {self.collection_id} and " - f"workflow/experiment: {self.workflow_config.get('workflow_id')}" - ) - pr_title = ( - f"Add new dataset collection: {self.collection_id} and " - f"workflow/experiment: {self.workflow_config.get('workflow_id')}" - ) - pr_body = ( - f"This PR adds a new dataset collection: {self.collection_id} and " - f"its corresponding workflow/experiment to the repository." + if mode in ("dataset", "all"): + ds_files = self.publish_dataset(write_to_file=False, mode=mode) + files.update(ds_files) + + if mode in ("workflow", "all"): + wf_files = self.generate_workflow_experiment_records( + write_to_file=False, mode=mode ) + files.update(wf_files) - # Publish all files in one go - pr_url = self.gh_publisher.publish_files( - branch_name=branch_name, - file_dict=combined_files, - commit_message=commit_message, - pr_title=pr_title, - pr_body=pr_body, + if not files: + raise ValueError( + "Nothing to publish. Choose mode='dataset', 'workflow', or 'all'." ) - logger.info(f"Pull request created: {pr_url}") + if write_to_file: + for file_path, data in files.items(): + # file_path might be a Path (from _update_and_add_to_file_dict) – normalize to str + out_path = str(file_path) + self._write_to_file(out_path, data) + return {} # consistent with existing write_to_file behavior + + # Prepare PR + mode_label = { + "dataset": f"dataset: {self.collection_id or 'unknown'}", + "workflow": f"workflow: {self.workflow_id or 'unknown'}", + "all": f"dataset: {self.collection_id or 'unknown'} + workflow/experiment: {self.workflow_id or 'unknown'}", + }[mode] + + branch_name = ( + f"{OSC_BRANCH_NAME}-{(self.collection_id or self.workflow_id or 'osc')}" + f"-{datetime.now().strftime('%Y%m%d%H%M%S')}" + ) + commit_message = f"Publish {mode_label}" + pr_title = f"Publish {mode_label}" + pr_body = f"This PR publishes {mode_label} to the repository." + + pr_url = self.gh_publisher.publish_files( + branch_name=branch_name, + file_dict=files, + commit_message=commit_message, + pr_title=pr_title, + pr_body=pr_body, + ) + logger.info(f"Pull request created: {pr_url}") + return pr_url diff --git a/deep_code/utils/dataset_stac_generator.py b/deep_code/utils/dataset_stac_generator.py index a30a817..29ff3b7 100644 --- a/deep_code/utils/dataset_stac_generator.py +++ b/deep_code/utils/dataset_stac_generator.py @@ -377,7 +377,7 @@ def build_theme(osc_themes: list[str]) -> Theme: concepts = [ThemeConcept(id=theme_str) for theme_str in osc_themes] return Theme(concepts=concepts, scheme=OSC_THEME_SCHEME) - def build_dataset_stac_collection(self) -> Collection: + def build_dataset_stac_collection(self, mode: str) -> Collection: """Build an OSC STAC Collection for the dataset. Returns: @@ -484,14 +484,15 @@ def build_dataset_stac_collection(self) -> Collection: ) ) - collection.add_link( - Link( - rel="related", - target=f"../../experiments/{self.workflow_id}/record.json", - media_type="application/json", - title=f"Experiment: {self.workflow_title}", + if mode in "all": + collection.add_link( + Link( + rel="related", + target=f"../../experiments/{self.workflow_id}/record.json", + media_type="application/json", + title=f"Experiment: {self.workflow_title}", + ) ) - ) collection.license = self.license_type diff --git a/deep_code/utils/github_automation.py b/deep_code/utils/github_automation.py index 8090069..e52eb75 100644 --- a/deep_code/utils/github_automation.py +++ b/deep_code/utils/github_automation.py @@ -1,14 +1,17 @@ #!/usr/bin/env python3 - # Copyright (c) 2025 by Brockmann Consult GmbH # Permissions are hereby granted under the terms of the MIT License: # https://opensource.org/licenses/MIT. +from __future__ import annotations + import json import logging import os +import shutil import subprocess from pathlib import Path +from typing import Any import requests @@ -23,99 +26,218 @@ class GitHubAutomation: token: Personal access token for GitHub. repo_owner: Owner of the repository to fork. repo_name: Name of the repository to fork. + local_clone_dir: Optional path to use for local clone (defaults to ~/temp_repo). """ - def __init__(self, username: str, token: str, repo_owner: str, repo_name: str): + def __init__( + self, + username: str, + token: str, + repo_owner: str, + repo_name: str, + local_clone_dir: str | None = None, + ): self.username = username self.token = token self.repo_owner = repo_owner self.repo_name = repo_name + self.base_repo_url = f"https://github.com/{repo_owner}/{repo_name}.git" - self.fork_repo_url = ( + # Tokenized origin URL for pushes to the fork + self.origin_repo_url = ( f"https://{username}:{token}@github.com/{username}/{repo_name}.git" ) - self.local_clone_dir = os.path.join(os.path.expanduser("~"), "temp_repo") + self.local_clone_dir = ( + os.path.join(os.path.expanduser("~"), "temp_repo") + if local_clone_dir is None + else local_clone_dir + ) + + def _run( + self, + cmd: list[str], + cwd: str | Path | None = None, + check: bool = True, + capture_output: bool = False, + text: bool = True, + ) -> subprocess.CompletedProcess: + """Run a subprocess command with consistent logging and error handling.""" + cwd_str = str(cwd) if cwd is not None else str(self.local_clone_dir) + logging.debug("RUN: %s (cwd=%s)", " ".join(cmd), cwd_str) + try: + return subprocess.run( + cmd, cwd=cwd_str, check=check, capture_output=capture_output, text=text + ) + except subprocess.CalledProcessError as e: + stdout = e.stdout or "" + stderr = e.stderr or "" + raise RuntimeError( + f"Command failed: {' '.join(cmd)}\nSTDOUT:\n{stdout}\nSTDERR:\n{stderr}" + ) from e + + def _run_git(self, args: list[str], cwd: str | Path | None = None) -> None: + self._run(["git", *args], cwd=cwd) + + def _git_output(self, args: list[str]) -> str: + res = self._run(["git", *args], capture_output=True) + return res.stdout - def fork_repository(self): + def _ensure_repo_dir(self) -> Path: + path = Path(self.local_clone_dir) + path.mkdir(parents=True, exist_ok=True) + return path + + def _ensure_upstream_remote(self) -> None: + """Ensure a remote named 'upstream' points to the base repo.""" + repo = self._ensure_repo_dir() + remotes = self._git_output(["remote", "-v"]) + upstream_url = self.base_repo_url + + if "upstream" not in remotes: + logging.info("Adding 'upstream' remote -> %s", upstream_url) + self._run_git(["remote", "add", "upstream", upstream_url], cwd=repo) + else: + # If the URL mismatches, fix it + if f"upstream\t{upstream_url} (fetch)" not in remotes: + logging.info("Updating 'upstream' remote URL -> %s", upstream_url) + self._run_git(["remote", "set-url", "upstream", upstream_url], cwd=repo) + + def fork_repository(self) -> None: """Fork the repository to the user's GitHub account.""" logging.info("Forking repository...") url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/forks" headers = {"Authorization": f"token {self.token}"} - response = requests.post(url, headers=headers) + response = requests.post(url, headers=headers, timeout=60) response.raise_for_status() - logging.info(f"Repository forked to {self.username}/{self.repo_name}") + logging.info("Repository forked to %s/%s", self.username, self.repo_name) + + def clone_sync_repository(self) -> None: + """Clone the forked repository locally if missing; otherwise fetch & fast-forward origin.""" + repo = self._ensure_repo_dir() + git_dir = repo / ".git" + + if not git_dir.exists(): + logging.info("Cloning fork: %s -> %s", self.origin_repo_url, repo) + self._run(["git", "clone", self.origin_repo_url, str(repo)], cwd=".") + # Ensure default branch is tracked locally (we don't assume 'main') + # This will be handled by later sync step. + else: + logging.info("Local clone exists; fetching latest from origin.") + self._run_git(["fetch", "--all", "--prune"], cwd=repo) + + # Always ensure we have the upstream remote configured + self._ensure_upstream_remote() + + def sync_fork_with_upstream( + self, base_branch: str = "main", strategy: str = "merge" + ) -> None: + """Sync local and origin base branch from upstream base branch. + strategy: + - 'ff' : fast-forward only + - 'rebase' : rebase local base_branch onto upstream/base_branch + - 'merge' : merge upstream/base_branch into local/base_branch (default) + """ + repo = self._ensure_repo_dir() + logging.info( + "Syncing fork with upstream (%s) on branch '%s'...", strategy, base_branch + ) + + # Make sure remotes are present and fresh + self._ensure_upstream_remote() + self._run_git(["fetch", "upstream", base_branch], cwd=repo) + self._run_git(["fetch", "origin", base_branch], cwd=repo) - def clone_sync_repository(self): - """Clone the forked repository locally if it doesn't exist, or pull updates if it does.""" - logging.info("Checking local repository...") - if not os.path.exists(self.local_clone_dir): - logging.info("Cloning forked repository...") + # Ensure we have a local branch for base_branch + local_branches = self._git_output(["branch"]) + if f" {base_branch}\n" not in (local_branches + "\n"): + # Create local base branch tracking origin/base_branch if it exists, otherwise upstream try: - subprocess.run( - ["git", "clone", self.fork_repo_url, self.local_clone_dir], - check=True, + self._run_git( + ["checkout", "-b", base_branch, f"origin/{base_branch}"], cwd=repo + ) + except RuntimeError: + self._run_git( + ["checkout", "-b", base_branch, f"upstream/{base_branch}"], cwd=repo ) - logging.info(f"Repository cloned to {self.local_clone_dir}") - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Failed to clone repository: {e}") else: - logging.info("Local repository already exists. Pulling latest changes...") - try: - os.chdir(self.local_clone_dir) - subprocess.run(["git", "pull"], check=True) - logging.info("Repository updated with latest changes.") - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Failed to pull latest changes: {e}") - - def create_branch(self, branch_name: str): - """Create a new branch in the local repository.""" - logging.info(f"Creating new branch: {branch_name}...") - try: - os.chdir(self.local_clone_dir) - subprocess.run(["git", "checkout", "-b", branch_name], check=True) - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Failed Creating branch: '{branch_name}': {e}") + self._run_git(["checkout", base_branch], cwd=repo) - def add_file(self, file_path: str, content): - """Add a new file to the local repository.""" - logging.info(f"Adding new file: {file_path}...") - os.chdir(self.local_clone_dir) - full_path = Path(self.local_clone_dir) / file_path + if strategy == "ff": + self._run_git(["merge", "--ff-only", f"upstream/{base_branch}"], cwd=repo) + elif strategy == "rebase": + self._run_git(["rebase", f"upstream/{base_branch}"], cwd=repo) + elif strategy == "merge": + self._run_git(["merge", f"upstream/{base_branch}"], cwd=repo) + else: + raise ValueError("strategy must be one of: 'ff', 'rebase', 'merge'") + + # Push updated base branch to fork + self._run_git(["push", "origin", base_branch], cwd=repo) + logging.info( + "Fork origin/%s is now aligned with upstream/%s.", base_branch, base_branch + ) + + def create_branch(self, branch_name: str, from_branch: str = "main") -> None: + """Create or reset a local branch from a given base branch.""" + repo = self._ensure_repo_dir() + logging.info("Creating branch '%s' from '%s'...", branch_name, from_branch) + + # Ensure base exists locally (caller should have synced beforehand) + self._run_git(["checkout", from_branch], cwd=repo) + # -B creates or resets the branch to the current HEAD of from_branch + self._run_git(["checkout", "-B", branch_name], cwd=repo) + + def add_file(self, file_path: str, content: Any) -> None: + """Add a new file (serialized to JSON) to the local repository and stage it.""" + repo = self._ensure_repo_dir() + full_path = Path(repo) / file_path full_path.parent.mkdir(parents=True, exist_ok=True) - # Ensure content is serializable + + # Normalize content to something JSON serializable if hasattr(content, "to_dict"): content = content.to_dict() if not isinstance(content, (dict, list, str, int, float, bool, type(None))): raise TypeError(f"Cannot serialize content of type {type(content)}") + try: json_content = json.dumps( content, indent=2, ensure_ascii=False, default=serialize ) except TypeError as e: - raise RuntimeError(f"JSON serialization failed: {e}") - with open(full_path, "w", encoding="utf-8") as f: - f.write(json_content) - try: - subprocess.run(["git", "add", str(full_path)], check=True) - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Failed to add file '{file_path}': {e}") + raise RuntimeError( + f"JSON serialization failed for '{file_path}': {e}" + ) from e + + full_path.write_text(json_content, encoding="utf-8") + self._run_git(["add", str(full_path)], cwd=repo) + logging.info("Added and staged file: %s", file_path) + + def commit_and_push(self, branch_name: str, commit_message: str) -> None: + """Commit staged changes on the branch and push to origin.""" + repo = self._ensure_repo_dir() + logging.info("Committing and pushing changes on '%s'...", branch_name) - def commit_and_push(self, branch_name: str, commit_message: str): - """Commit changes and push to the forked repository.""" - logging.info("Committing and pushing changes...") - os.chdir(self.local_clone_dir) + self._run_git(["checkout", branch_name], cwd=repo) try: - subprocess.run(["git", "commit", "-m", commit_message], check=True) - subprocess.run(["git", "push", "-u", "origin", branch_name], check=True) - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Failed to commit and push: {e}") + self._run_git(["commit", "-m", commit_message], cwd=repo) + except RuntimeError as e: + if "nothing to commit" in str(e).lower(): + logging.info("Nothing to commit on '%s'; pushing anyway.", branch_name) + else: + raise + self._run_git(["push", "-u", "origin", branch_name], cwd=repo) def create_pull_request( self, branch_name: str, pr_title: str, pr_body: str, base_branch: str = "main" - ): - """Create a pull request from the forked repository to the base repository.""" - logging.info("Creating a pull request...") - os.chdir(self.local_clone_dir) + ) -> str: + """Create a pull request from fork:branch_name → base_repo:base_branch. + + Returns: + The created PR URL. + """ + logging.info( + "Creating pull request '%s' -> base:%s ...", branch_name, base_branch + ) url = f"https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/pulls" headers = {"Authorization": f"token {self.token}"} data = { @@ -124,22 +246,27 @@ def create_pull_request( "base": base_branch, "body": pr_body, } - response = requests.post(url, headers=headers, json=data) + response = requests.post(url, headers=headers, json=data, timeout=60) response.raise_for_status() - pr_url = response.json()["html_url"] - logging.info(f"Pull request created: {pr_url}") + pr_url = response.json().get("html_url", "") + logging.info("Pull request created: %s", pr_url) + return pr_url - def clean_up(self): - """Clean up the local cloned repository.""" - logging.info("Cleaning up local repository...") - os.chdir("..") + def clean_up(self) -> None: + """Remove the local cloned repository directory.""" + repo = Path(self.local_clone_dir) + logging.info("Cleaning up local repository at %s ...", repo) try: - subprocess.run(["rm", "-rf", self.local_clone_dir]) - except subprocess.CalledProcessError as e: - raise RuntimeError(f"Failed to clean-up local repository: {e}") + if repo.exists(): + shutil.rmtree(repo) + except Exception as e: + raise RuntimeError( + f"Failed to clean up local repository '{repo}': {e}" + ) from e - def file_exists(self, file_path) -> bool: + def file_exists(self, file_path: str) -> bool: + """Check if a file exists within the local clone directory.""" full_path = Path(self.local_clone_dir) / file_path - exists = os.path.isfile(full_path) - logging.debug(f"Checking existence of {full_path}: {exists}") + exists = full_path.is_file() + logging.debug("Checking existence of %s: %s", full_path, exists) return exists diff --git a/deep_code/utils/ogc_api_record.py b/deep_code/utils/ogc_api_record.py index 22bc5eb..e95ec10 100644 --- a/deep_code/utils/ogc_api_record.py +++ b/deep_code/utils/ogc_api_record.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Tuple, List, Dict +from typing import Any, Dict, List, Optional, Tuple from urllib.parse import quote, urlencode, urlparse from xrlint.util.constructible import MappingConstructible @@ -17,19 +17,32 @@ class Contact(MappingConstructible["Contact"], JsonSerializable): def __init__( self, - name: str, - organization: str, - position: str | None = "", + name: str | None = None, + organization: str | None = None, + position: str | None = None, links: list[dict[str, Any]] | None = None, - contactInstructions: str | None = "", - roles: list[str] = None, + contactInstructions: str | None = None, + roles: list[str] | None = None, ): self.name = name self.organization = organization self.position = position - self.links = links or [] + self.links = links self.contactInstructions = contactInstructions - self.roles = roles or ["principal investigator"] + self.roles = roles + + def to_dict(self, value_name: str | None = None) -> dict[str, JsonValue]: + """Serialize to JSON, dropping None values.""" + data = { + "name": self.name, + "organization": self.organization, + "position": self.position, + "links": self.links, + "contactInstructions": self.contactInstructions, + "roles": self.roles, + } + # keep only explicitly set fields + return {k: v for k, v in data.items() if v is not None} class ThemeConcept(MappingConstructible["ThemeConcept"], JsonSerializable): @@ -115,7 +128,7 @@ def build_theme_links_for_records(self): return self.theme_links @staticmethod - def build_link_to_dataset(collection_id): + def build_link_to_dataset(collection_id) -> list[dict[str, str]]: return [ { "rel": "child", @@ -125,6 +138,17 @@ def build_link_to_dataset(collection_id): } ] + def build_child_link_to_related_experiment(self) -> list[dict[str, str]]: + """Build a link to the related experiment record if publishing mode is 'all'.""" + return [ + { + "rel": "child", + "href": f"../../experiments/{self.id}/record.json", + "type": "application/json", + "title": self.title, + } + ] + def build_link_to_jnb(self, workflow_title, jupyter_nb_url) -> List[Dict[str, Any]]: return [ { @@ -265,12 +289,6 @@ def _generate_static_links(self): "type": "application/json", "title": "Workflows", }, - { - "rel": "child", - "href": f"../../experiments/{self.id}/record.json", - "type": "application/json", - "title": f"{self.title}", - }, { "rel": "jupyter-notebook", "type": "application/json", diff --git a/deep_code/version.py b/deep_code/version.py index c80384e..e81a4d9 100644 --- a/deep_code/version.py +++ b/deep_code/version.py @@ -19,4 +19,4 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -version = "0.1.5" +version = "0.1.6.dev" diff --git a/examples/notebooks/publish_to_EarthCODE.ipynb b/examples/notebooks/publish_to_EarthCODE.ipynb index b2872df..eb8d520 100644 --- a/examples/notebooks/publish_to_EarthCODE.ipynb +++ b/examples/notebooks/publish_to_EarthCODE.ipynb @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "dc774651-bef8-4483-9a83-7fcacdc88797", "metadata": { "tags": [] @@ -59,13 +59,14 @@ "import warnings\n", "\n", "from xcube.core.store import new_data_store\n", + "\n", "from deep_code.tools.lint import LintDataset\n", "from deep_code.tools.publish import Publisher" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "626af94e-9397-4e1f-83e8-9fb1de5d9e0b", "metadata": { "tags": [] @@ -105,33 +106,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "dcc6255f-b680-419c-acf1-38968c104790", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING - 2025-09-12T13:17:36Z - COPERNICUS_MARINE_SERVICE_USERNAME is deprecated. Please use COPERNICUSMARINE_SERVICE_USERNAME instead.\n", - "WARNING:copernicusmarine:COPERNICUS_MARINE_SERVICE_USERNAME is deprecated. Please use COPERNICUSMARINE_SERVICE_USERNAME instead.\n", - "WARNING - 2025-09-12T13:17:36Z - COPERNICUS_MARINE_SERVICE_PASSWORD is deprecated. Please use COPERNICUSMARINE_SERVICE_PASSWORD instead.\n", - "WARNING:copernicusmarine:COPERNICUS_MARINE_SERVICE_PASSWORD is deprecated. Please use COPERNICUSMARINE_SERVICE_PASSWORD instead.\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "store = new_data_store(\"cmems\")\n", "store" @@ -139,546 +119,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "8003fe01-2186-4a60-9ec6-b6ce51d2a185", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - 2025-09-12T13:17:42Z - Selected dataset version: \"201904\"\n", - "INFO:copernicusmarine:Selected dataset version: \"201904\"\n", - "INFO - 2025-09-12T13:17:42Z - Selected dataset part: \"default\"\n", - "INFO:copernicusmarine:Selected dataset part: \"default\"\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 10MB\n",
-       "Dimensions:                  (time: 5, latitude: 451, longitude: 551)\n",
-       "Coordinates:\n",
-       "  * latitude                 (latitude) float32 2kB 53.0 53.02 ... 61.98 62.0\n",
-       "  * longitude                (longitude) float32 2kB 9.0 9.02 ... 19.98 20.0\n",
-       "  * time                     (time) datetime64[ns] 40B 2022-01-01 ... 2022-01-05\n",
-       "Data variables:\n",
-       "    sea_surface_temperature  (time, latitude, longitude) float64 10MB dask.array<chunksize=(1, 451, 512), meta=np.ndarray>\n",
-       "Attributes:\n",
-       "    Conventions:  CF-1.6\n",
-       "    comment:      IN NO EVENT SHALL DMI OR ITS REPRESENTATIVES BE LIABLE FOR ...\n",
-       "    source:       VIIRS_SST_NPP_NAR-OSISAF-L3C, SEVIRI_SST-OSISAF-L3C, EUR-L2...\n",
-       "    title:        DMI North Sea/Baltic Sea SST, L3S, (SST_BAL_SST_L3S_NRT_OBS...\n",
-       "    references:   Høyer, J. L., Le Borgne, P., & Eastwood, S. (2014). A bias ...\n",
-       "    history:      Version 1.0\n",
-       "    institution:  Danish Meteorological Institute, DMI
" - ], - "text/plain": [ - " Size: 10MB\n", - "Dimensions: (time: 5, latitude: 451, longitude: 551)\n", - "Coordinates:\n", - " * latitude (latitude) float32 2kB 53.0 53.02 ... 61.98 62.0\n", - " * longitude (longitude) float32 2kB 9.0 9.02 ... 19.98 20.0\n", - " * time (time) datetime64[ns] 40B 2022-01-01 ... 2022-01-05\n", - "Data variables:\n", - " sea_surface_temperature (time, latitude, longitude) float64 10MB dask.array\n", - "Attributes:\n", - " Conventions: CF-1.6\n", - " comment: IN NO EVENT SHALL DMI OR ITS REPRESENTATIVES BE LIABLE FOR ...\n", - " source: VIIRS_SST_NPP_NAR-OSISAF-L3C, SEVIRI_SST-OSISAF-L3C, EUR-L2...\n", - " title: DMI North Sea/Baltic Sea SST, L3S, (SST_BAL_SST_L3S_NRT_OBS...\n", - " references: Høyer, J. L., Le Borgne, P., & Eastwood, S. (2014). A bias ...\n", - " history: Version 1.0\n", - " institution: Danish Meteorological Institute, DMI" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ds = store.open_data(\n", " \"DMI-BALTIC-SST-L3S-NRT-OBS_FULL_TIME_SERIE\",\n", @@ -831,7 +277,7 @@ "id": "b7aa7bc9-83d5-4e43-9f7f-a4da18e8cacd", "metadata": {}, "source": [ - "Once the dataset and workflow metadata are prepared and validated, users can initiate the publishing process using the deep-code CLI. The following command automates the entire workflow:" + "Once the dataset and workflow metadata are prepared and validated, users can initiate the publishing process using the deep-code CLI or the python function. The following command automates the entire workflow:" ] }, { @@ -850,110 +296,67 @@ "4. Creates a Pull Request (PR) for review by the Open Science Catalog steward" ] }, + { + "cell_type": "markdown", + "id": "505ec89b-ff01-4deb-8bac-ef5bb99aa9ee", + "metadata": {}, + "source": [ + "Execute the below cell to confirm if you are in the correct directory, the current directory should have the .gitaccess file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0adca69e-31a5-4ff6-8466-1a129ee85779", + "metadata": {}, + "outputs": [], + "source": [ + "notebook_path = os.path.abspath(\"publish_to_EarthCODE.ipynb\")\n", + "notebook_dir = os.path.dirname(notebook_path)\n", + "print(\"Current Working Directory:\", notebook_dir)\n", + "# Change working directory\n", + "os.chdir(notebook_dir)\n", + "\n", + "# Confirm\n", + "print(\"Current Working Directory:\", os.getcwd())" + ] + }, { "cell_type": "markdown", "id": "e763ef5b-c4c2-4991-a612-7f2bff9a676d", "metadata": {}, "source": [ - "## publish using the python function" + "## publish using the python function\n", + "Before publishing, please prepare dataset and/or workflow configuration file for your use case" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "c7e3a5a8-d74d-4bd4-9b76-45ace4b2d341", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:Forking repository...\n", - "INFO:root:Repository forked to tejasmharish/open-science-catalog-metadata-staging\n", - "INFO:root:Checking local repository...\n", - "INFO:root:Cloning forked repository...\n", - "Cloning into '/home/tejas/temp_repo'...\n", - "INFO:root:Repository cloned to /home/tejas/temp_repo\n", - "INFO:deep_code.tools.publish:Generating STAC collection...\n", - "INFO:deep_code.utils.dataset_stac_generator:Attempting to open dataset 'cmems_sst_v2.zarr' with configuration: Public store\n", - "INFO:deep_code.utils.dataset_stac_generator:Successfully opened dataset 'cmems_sst_v2.zarr' with configuration: Public store\n", - "INFO:deep_code.tools.publish:Variable catalog already exists for sea-surface-temperature, adding product link.\n", - "INFO:deep_code.tools.publish:Generating OGC API Record for the workflow...\n", - "INFO:root:Creating new branch: add-new-collection-cmems-sst-20250912151834...\n", - "Switched to a new branch 'add-new-collection-cmems-sst-20250912151834'\n", - "INFO:deep_code.tools.publish:Adding products/cmems-sst/collection.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: products/cmems-sst/collection.json...\n", - "INFO:deep_code.tools.publish:Adding variables/sea-surface-temperature/catalog.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: variables/sea-surface-temperature/catalog.json...\n", - "INFO:deep_code.tools.publish:Adding /home/tejas/temp_repo/variables/catalog.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: /home/tejas/temp_repo/variables/catalog.json...\n", - "INFO:deep_code.tools.publish:Adding /home/tejas/temp_repo/products/catalog.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: /home/tejas/temp_repo/products/catalog.json...\n", - "INFO:deep_code.tools.publish:Adding /home/tejas/temp_repo/projects/deep-earth-system-data-lab/collection.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: /home/tejas/temp_repo/projects/deep-earth-system-data-lab/collection.json...\n", - "INFO:deep_code.tools.publish:Adding workflows/lps-demo-cmems-sst-workflow/record.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: workflows/lps-demo-cmems-sst-workflow/record.json...\n", - "INFO:deep_code.tools.publish:Adding experiments/lps-demo-cmems-sst-workflow/record.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: experiments/lps-demo-cmems-sst-workflow/record.json...\n", - "INFO:deep_code.tools.publish:Adding experiments/catalog.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: experiments/catalog.json...\n", - "INFO:deep_code.tools.publish:Adding workflows/catalog.json to add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Adding new file: workflows/catalog.json...\n", - "INFO:root:Committing and pushing changes...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[add-new-collection-cmems-sst-20250912151834 c3381005] Add new dataset collection: cmems-sst and workflow/experiment: lps-demo-cmems-sst-workflow\n", - " 9 files changed, 434 insertions(+), 6 deletions(-)\n", - " create mode 100644 experiments/lps-demo-cmems-sst-workflow/record.json\n", - " create mode 100644 products/cmems-sst/collection.json\n", - " create mode 100644 workflows/lps-demo-cmems-sst-workflow/record.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "remote: \n", - "remote: Create a pull request for 'add-new-collection-cmems-sst-20250912151834' on GitHub by visiting: \n", - "remote: https://github.com/tejasmharish/open-science-catalog-metadata-staging/pull/new/add-new-collection-cmems-sst-20250912151834 \n", - "remote: \n", - "To https://github.com/tejasmharish/open-science-catalog-metadata-staging.git\n", - " * [new branch] add-new-collection-cmems-sst-20250912151834 -> add-new-collection-cmems-sst-20250912151834\n", - "INFO:root:Creating a pull request...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Branch 'add-new-collection-cmems-sst-20250912151834' set up to track remote branch 'add-new-collection-cmems-sst-20250912151834' from 'origin'.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:Pull request created: https://github.com/ESA-EarthCODE/open-science-catalog-metadata-staging/pull/156\n", - "INFO:deep_code.tools.publish:Pull request created: None\n", - "INFO:root:Cleaning up local repository...\n", - "INFO:deep_code.tools.publish:Pull request created: None\n" - ] - } - ], - "source": [ - "# publish using the python function\n", + "outputs": [], + "source": [ + "# publish dataset and workflow using the python function\n", "publisher = Publisher(\n", " dataset_config_path=\"dataset-config.yaml\",\n", - " workflow_config_path=\"workflow-config.yaml\",\n", - " environment=\"staging\",\n", + " workflow_config_path=\"workflow-config.yaml\"\n", ")\n", - "publisher.publish_all()" + "publisher.publish()\n", + "\n", + "# # To publish only workflow\n", + "# publisher = Publisher(\n", + "# workflow_config_path=\"workflow-config.yaml\"\n", + "# )\n", + "# publisher.publish(mode=\"worflow\")\n", + "\n", + "# # To publish only dataset\n", + "# publisher = Publisher(\n", + "# dataset_config_path=\"dataset-config.yaml\",\n", + "# )\n", + "# publisher.publish(mode=\"dataset\")" ] }, { @@ -961,7 +364,11 @@ "id": "09bcacf5-f435-4af3-8984-5ddf421504b8", "metadata": {}, "source": [ - "## publish using cli" + "## publish using cli\n", + "- Publisher now supports `mode` parameter, This allows more flexible publishing:\n", + " - `\"dataset\"` → publish dataset only\n", + " - `\"workflow\"` → publish workflow only\n", + " - `\"all\"` → publish both (default)" ] }, { @@ -973,7 +380,15 @@ }, "outputs": [], "source": [ - "!deep-code publish dataset-config.yaml workflow-config.yaml -e staging" + "!deep-code publish dataset-config.yaml workflow-config.yaml " + ] + }, + { + "cell_type": "markdown", + "id": "635aafd0-6058-4c13-9679-3351aea7b173", + "metadata": {}, + "source": [ + "publish dataset with -m dataset (dataset mode)" ] }, { @@ -982,7 +397,45 @@ "id": "406f5d35-03eb-4a6c-ba99-f70ee1101038", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "!deep-code publish dataset-config.yaml -m dataset" + ] + }, + { + "cell_type": "markdown", + "id": "e2110051-2b36-493e-912f-e5666ad8398a", + "metadata": {}, + "source": [ + "publish workflow with -m workflow (workflow mode)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "739f1046-71ee-4388-a14a-b2af53f986e9", + "metadata": {}, + "outputs": [], + "source": [ + "!deep-code publish workflow-config.yaml -m workflow" + ] + }, + { + "cell_type": "markdown", + "id": "749eaef3-ccf1-4756-96a9-f279ab640a4c", + "metadata": {}, + "source": [ + "explicitly pass workflow config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1e20403-95b5-4cee-8730-6c458e13c19d", + "metadata": {}, + "outputs": [], + "source": [ + "!deep-code publish --workflow-config workflow-config.yaml " + ] } ], "metadata": {