diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 7e5b9989..98eb572a 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # TACA Version Log +##20251121.1 + +Exclude pod5 from delivery by default + ## 20251106.1 Improve logging diff --git a/taca/__init__.py b/taca/__init__.py index 5b70295c..e67efcfd 100644 --- a/taca/__init__.py +++ b/taca/__init__.py @@ -1,3 +1,3 @@ """Main TACA module""" -__version__ = "1.6.12" +__version__ = "1.6.13" diff --git a/taca/organise/cli.py b/taca/organise/cli.py index 166f2d40..725dfa72 100644 --- a/taca/organise/cli.py +++ b/taca/organise/cli.py @@ -13,8 +13,14 @@ required=True, help="Project ID (e.g. P12345)", ) # future todo: option to organise all flowcells in a project +@click.option( + "--include_pod5", + is_flag=True, + default=False, + help="Include pod5 files when organising Nanopore flowcells. Default is False.", +) @click.argument("flowcells") -def organise_flowcells(flowcells, project): +def organise_flowcells(flowcells, project, include_pod5): """Organise FLOWCELLS. FLOWCELLS is the name of one or more sequencing flowcells, separated by a comma. e.g.: @@ -22,4 +28,4 @@ def organise_flowcells(flowcells, project): """ flowcells_to_organise = flowcells.split(",") for fc in flowcells_to_organise: - organise.organise_flowcell(fc, project) + organise.organise_flowcell(fc, project, include_pod5) diff --git a/taca/organise/flowcells.py b/taca/organise/flowcells.py index 123dcc5a..70687526 100644 --- a/taca/organise/flowcells.py +++ b/taca/organise/flowcells.py @@ -11,9 +11,11 @@ logger = logging.getLogger(__name__) -def get_flowcell_object(flowcell, project): +def get_flowcell_object(flowcell, project, include_pod5=False): if re.match(filesystem.RUN_RE_ONT, flowcell): - return NanoporeFlowcell(flowcell=flowcell, project_id=project) + return NanoporeFlowcell( + flowcell=flowcell, project_id=project, include_pod5=include_pod5 + ) elif re.match(filesystem.RUN_RE_ILLUMINA, flowcell): return IlluminaFlowcell(flowcell=flowcell, project_id=project) elif re.match(filesystem.RUN_RE_ELEMENT, flowcell): @@ -44,13 +46,14 @@ def create_org_dir(self): class NanoporeFlowcell(Flowcell): """Defines a Nanopore Flowcell""" - def __init__(self, flowcell, project_id): + def __init__(self, flowcell, project_id, include_pod5=False): super().__init__(flowcell, project_id) self.destination_path = CONFIG.get("organise").get("nanopore_path") self.organised_project_dir = os.path.join(self.destination_path, project_id) self.tar_file = self.fc_id + ".tar" self.tar_path = os.path.join(self.organised_project_dir, self.tar_file) self.md5_path = self.tar_path + ".md5" + self.include_pod5 = include_pod5 def organise_data(self): """Tarball data into ONT_TAR""" @@ -59,7 +62,13 @@ def organise_data(self): tar_err = os.path.join(self.organised_project_dir, "tar.err") with filesystem.chdir(self.incoming_path): with open(tar_err, "w") as error_file: - tar_command = ["tar", "-cvf", self.tar_path, self.fc_id] + tar_command = [ + "tar", + *(["--exclude=pod5*"] if not self.include_pod5 else []), + "-cvf", + self.tar_path, + self.fc_id, + ] result = subprocess.run(tar_command, stderr=error_file) if result.returncode != 0: logger.error( diff --git a/taca/organise/organise.py b/taca/organise/organise.py index eba5bc97..9e45086f 100644 --- a/taca/organise/organise.py +++ b/taca/organise/organise.py @@ -7,9 +7,9 @@ logger = logging.getLogger(__name__) -def organise_flowcell(flowcell, project): +def organise_flowcell(flowcell, project, include_pod5=False): """Determine flowcell type and organise the data accordingly.""" - flowcell_object = get_flowcell_object(flowcell, project) + flowcell_object = get_flowcell_object(flowcell, project, include_pod5) flowcell_object.create_org_dir() flowcell_object.organise_data() logger.info(f"Finished organisation of flowcell {flowcell}.")