From 47deed4488cd957d6f085e28c491ff0d6e3a8583 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Tue, 18 Nov 2025 15:58:20 +0100 Subject: [PATCH 1/5] Exclude pod5 by default --- taca/organise/cli.py | 10 ++++++++-- taca/organise/flowcells.py | 22 ++++++++++++++++++---- taca/organise/organise.py | 4 ++-- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/taca/organise/cli.py b/taca/organise/cli.py index 166f2d40..acde672f 100644 --- a/taca/organise/cli.py +++ b/taca/organise/cli.py @@ -13,8 +13,14 @@ required=True, help="Project ID (e.g. P12345)", ) # future todo: option to organise all flowcells in a project +@click.option( + "--include_pod5", + is_flag=True, + default=False, + help="Include pod5 files when organising Nanopore flowcells. Defaule is False.", +) @click.argument("flowcells") -def organise_flowcells(flowcells, project): +def organise_flowcells(flowcells, project, include_pod5): """Organise FLOWCELLS. FLOWCELLS is the name of one or more sequencing flowcells, separated by a comma. e.g.: @@ -22,4 +28,4 @@ def organise_flowcells(flowcells, project): """ flowcells_to_organise = flowcells.split(",") for fc in flowcells_to_organise: - organise.organise_flowcell(fc, project) + organise.organise_flowcell(fc, project, include_pod5) diff --git a/taca/organise/flowcells.py b/taca/organise/flowcells.py index 123dcc5a..af090ce7 100644 --- a/taca/organise/flowcells.py +++ b/taca/organise/flowcells.py @@ -11,9 +11,11 @@ logger = logging.getLogger(__name__) -def get_flowcell_object(flowcell, project): +def get_flowcell_object(flowcell, project, include_pod5=False): if re.match(filesystem.RUN_RE_ONT, flowcell): - return NanoporeFlowcell(flowcell=flowcell, project_id=project) + return NanoporeFlowcell( + flowcell=flowcell, project_id=project, include_pod5=include_pod5 + ) elif re.match(filesystem.RUN_RE_ILLUMINA, flowcell): return IlluminaFlowcell(flowcell=flowcell, project_id=project) elif re.match(filesystem.RUN_RE_ELEMENT, flowcell): @@ -44,13 +46,14 @@ def create_org_dir(self): class NanoporeFlowcell(Flowcell): """Defines a Nanopore Flowcell""" - def __init__(self, flowcell, project_id): + def __init__(self, flowcell, project_id, include_pod5=False): super().__init__(flowcell, project_id) self.destination_path = CONFIG.get("organise").get("nanopore_path") self.organised_project_dir = os.path.join(self.destination_path, project_id) self.tar_file = self.fc_id + ".tar" self.tar_path = os.path.join(self.organised_project_dir, self.tar_file) self.md5_path = self.tar_path + ".md5" + self.include_pod5 = include_pod5 def organise_data(self): """Tarball data into ONT_TAR""" @@ -59,7 +62,18 @@ def organise_data(self): tar_err = os.path.join(self.organised_project_dir, "tar.err") with filesystem.chdir(self.incoming_path): with open(tar_err, "w") as error_file: - tar_command = ["tar", "-cvf", self.tar_path, self.fc_id] + if not self.include_pod5: + # exclude pod5 files from tarball + tar_command = [ + "tar", + "--exclude=pod5*", + "-cvf", + self.tar_path, + self.fc_id, + ] + else: + # include pod5 files from tarball + tar_command = ["tar", "-cvf", self.tar_path, self.fc_id] result = subprocess.run(tar_command, stderr=error_file) if result.returncode != 0: logger.error( diff --git a/taca/organise/organise.py b/taca/organise/organise.py index eba5bc97..9e45086f 100644 --- a/taca/organise/organise.py +++ b/taca/organise/organise.py @@ -7,9 +7,9 @@ logger = logging.getLogger(__name__) -def organise_flowcell(flowcell, project): +def organise_flowcell(flowcell, project, include_pod5=False): """Determine flowcell type and organise the data accordingly.""" - flowcell_object = get_flowcell_object(flowcell, project) + flowcell_object = get_flowcell_object(flowcell, project, include_pod5) flowcell_object.create_org_dir() flowcell_object.organise_data() logger.info(f"Finished organisation of flowcell {flowcell}.") From 98d39bcbe1152ff7a2b70b26ae68df2b99efed4f Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Fri, 21 Nov 2025 09:47:35 +0100 Subject: [PATCH 2/5] Versioning --- VERSIONLOG.md | 4 ++++ taca/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 7e5b9989..98eb572a 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # TACA Version Log +##20251121.1 + +Exclude pod5 from delivery by default + ## 20251106.1 Improve logging diff --git a/taca/__init__.py b/taca/__init__.py index 5b70295c..e67efcfd 100644 --- a/taca/__init__.py +++ b/taca/__init__.py @@ -1,3 +1,3 @@ """Main TACA module""" -__version__ = "1.6.12" +__version__ = "1.6.13" From dec56b5370a5cfda3a3ebd5d0f5d73328185c3a1 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Tue, 25 Nov 2025 14:53:07 +0100 Subject: [PATCH 3/5] Spelling Co-authored-by: Johannes Alneberg --- taca/organise/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taca/organise/cli.py b/taca/organise/cli.py index acde672f..725dfa72 100644 --- a/taca/organise/cli.py +++ b/taca/organise/cli.py @@ -17,7 +17,7 @@ "--include_pod5", is_flag=True, default=False, - help="Include pod5 files when organising Nanopore flowcells. Defaule is False.", + help="Include pod5 files when organising Nanopore flowcells. Default is False.", ) @click.argument("flowcells") def organise_flowcells(flowcells, project, include_pod5): From ec593e2c6864712c3a3416fb48ed32093dc51b86 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Wed, 26 Nov 2025 15:41:27 +0100 Subject: [PATCH 4/5] Simplify creation of tar_command Co-authored-by: Anandashankar Anil --- taca/organise/flowcells.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/taca/organise/flowcells.py b/taca/organise/flowcells.py index af090ce7..2f8ba00c 100644 --- a/taca/organise/flowcells.py +++ b/taca/organise/flowcells.py @@ -62,18 +62,13 @@ def organise_data(self): tar_err = os.path.join(self.organised_project_dir, "tar.err") with filesystem.chdir(self.incoming_path): with open(tar_err, "w") as error_file: - if not self.include_pod5: - # exclude pod5 files from tarball - tar_command = [ - "tar", - "--exclude=pod5*", - "-cvf", - self.tar_path, - self.fc_id, - ] - else: - # include pod5 files from tarball - tar_command = ["tar", "-cvf", self.tar_path, self.fc_id] + tar_command = [ + "tar", + *(["--exclude=pod5*"] if not self.include_pod5 else []), + "-cvf", + self.tar_path, + self.fc_id, + ] result = subprocess.run(tar_command, stderr=error_file) if result.returncode != 0: logger.error( From ceb8266bf4f2655bd42971c49b5ec8cc4223e531 Mon Sep 17 00:00:00 2001 From: Sara Sjunnebo Date: Wed, 26 Nov 2025 15:44:41 +0100 Subject: [PATCH 5/5] Ruff --- taca/organise/flowcells.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/taca/organise/flowcells.py b/taca/organise/flowcells.py index 2f8ba00c..70687526 100644 --- a/taca/organise/flowcells.py +++ b/taca/organise/flowcells.py @@ -63,11 +63,11 @@ def organise_data(self): with filesystem.chdir(self.incoming_path): with open(tar_err, "w") as error_file: tar_command = [ - "tar", - *(["--exclude=pod5*"] if not self.include_pod5 else []), - "-cvf", - self.tar_path, - self.fc_id, + "tar", + *(["--exclude=pod5*"] if not self.include_pod5 else []), + "-cvf", + self.tar_path, + self.fc_id, ] result = subprocess.run(tar_command, stderr=error_file) if result.returncode != 0: