diff --git a/assets/test-datasets/b_fragilis_checkv_contamination.tsv b/assets/test-datasets/b_fragilis_checkv_contamination.tsv new file mode 100644 index 00000000..3d450230 --- /dev/null +++ b/assets/test-datasets/b_fragilis_checkv_contamination.tsv @@ -0,0 +1,5 @@ +contig_id contig_length total_genes viral_genes host_genes provirus proviral_length host_length region_types region_lengths region_coords_bp region_coords_genes region_viral_genes region_host_genes +k141_126 3500 6 4 2 Yes 2000 1500 viral,host,viral 1000,1500,1000 1-1000,1001-2500,2501-3500 1-2,3-4,5-6 2,0,2 0,2,0 +k141_248 1000 3 1 2 Yes 500 500 host,viral,host 250,500,250 1-250,251-750,751-1000 1,2,3 0,1,0 1,0,1 +k141_50 1000 6 0 0 No NA NA NA NA NA NA NA NA +k141_203 843 2 1 1 Yes 500 343 viral,host 500,343 1-500,501-843 1,2 1,0 0,1 diff --git a/assets/test-datasets/b_fragilis_clusters.tsv b/assets/test-datasets/b_fragilis_clusters.tsv new file mode 100644 index 00000000..837b413b --- /dev/null +++ b/assets/test-datasets/b_fragilis_clusters.tsv @@ -0,0 +1,3 @@ +k141_126 k141_126,k141_248 +k141_50 k141_50 +k141_203 k141_203 diff --git a/assets/test-datasets/b_fragilis_contigs.fasta.gz b/assets/test-datasets/b_fragilis_contigs.fasta.gz new file mode 100644 index 00000000..67039024 Binary files /dev/null and b/assets/test-datasets/b_fragilis_contigs.fasta.gz differ diff --git a/assets/test-datasets/b_fragilis_genomad_virus_summary.tsv b/assets/test-datasets/b_fragilis_genomad_virus_summary.tsv new file mode 100644 index 00000000..9911c3b8 --- /dev/null +++ b/assets/test-datasets/b_fragilis_genomad_virus_summary.tsv @@ -0,0 +1,5 @@ +seq_name length topology coordinates n_genes genetic_code virus_score fdr n_hallmarks marker_enrichment taxonomy +k141_126 3970 Provirus 1-3500 6 11 0.8408 0.0833 0 1.5749 Unclassified +k141_248 3153 Provirus 1000-2000 3 11 0.7912 0.1337 0 1.7183 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes +k141_50 3219 Provirus 1-1000 6 11 0.6643 0.1798 2 2.7775 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes +k141_203 843 No terminal repeats NA 2 11 0.9925 0.0075 1 3.0763 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes diff --git a/assets/test-datasets/b_fragilis_propagate_contigs.fa b/assets/test-datasets/b_fragilis_propagate_contigs.fa new file mode 100644 index 00000000..432901bb --- /dev/null +++ b/assets/test-datasets/b_fragilis_propagate_contigs.fa @@ -0,0 +1,6 @@ +>k141_126 +CATAAACTACGCTCGGCACAGCCTCTGCTCGCCGTGCTGAACCGCCTGGAACAGAGAAAACCGGTGGGGCTGCGCTACGATCCGCAAGCACAATCGCTGGTGTGCCTGCCCACGCAAACCCGGACGGGCTGGAATCTCAATGGCTTTGAGGTGGGGTTCAGGCCATGCGTCAGGCTGATGATTTACGGACGTTCTCTTGAGGCGCAGGCAACCGCGAGTCTTGCAGCAGCCACAGGCTATGACAGCCATATCTTCGATCTTTTTCCGGCCTCAGCCAGCGCTCAGATCGATACCGATACGGCGGTCATTTTGCTGTGCCATGATCTCAACCGGGAGCTGCCAGTGTTGCAGGCCGCGCGAGAAGCAAAACCCTTTTATCTCGGCGCATTGGGCAGCTATCGAACCCACACGTTACGTCTGCAAAAGCTCCACGAGCTGGGATGGTCCAGGGAGGAAACAACGCAAATCCGGGCACCCGTCGGGATATTCCCCAAAGCCCGGGATGCGCATACTCTGGCACTCTCCGTGCTGGCAGAAGTCGCCTCTGTACGTCTCCATCAGGAGGAGGATTCATGCCTGCCCCCGTCGTCCTGATCCTTGCGGCCGGGCGTGGAGAGTGCTTTCTCGCCTCCGGGGGAAATACCCATAAGTGTATCGGCTGGCGTCAGTCCCCGGAGGTTGCGCCTTATCGCTGGCCATTTGAAGAAAACGGGAGAACTTTCGACCTTGCGATTGAACCGCAGATTACGACTAATGATCTGCGTCTGATGTTGAGGCTGGCTCTTGCCGGCGGAGGAATAACAATTGCCACTCAGGAAACTTTCAGGCCATATATTGAAAGCGGTAAGCTTGTATCGCTGCTTGATGACTTTCTTCCACAATTTCCGGGCTTCTATCTGTATTTCCCACAGCGTCGCAATATTGCACCAAAGCTCCGCGCCCTGATTGACTACGTCAAAGAATGGCGGCAGCAATTGGCTTAAATGTCTGCACCTGCACTGCCTGATGTCAGAACAGTATTTTGATGAATTGCCAAGGTTACAATGGCACAAATACGGCACAGGAGGAAACGTGGTGTATTTAAATATGGGGTAACTCTTTGATTTTAATGGTGCCGATAATAGGAGTCGAACCTACGACCTTCGCATTACGAATGCGATGTTAATGCATTTTTAACTACTTGAGCAATCAATCAAAACAACATACTTTATATTATAAAACACCAAGTTATAGCAATAAACCCGTTGATTGAAATTAACAAAAGTTGATGTTTATTCCCTCCATTTACTTACACCAGCAGTTACATTATTGTAAGACAGCAATTCTCAACTGGAGCCGAAAAATGGCGCTTTCAAGACAAAAATTTACCTTCGAAAGACTTCGCAGATTCACCTTACCGGAAGGGAAAAAACAAACTTTTCTTTGGGATGCAGATGTAACAACCCTGGCATGCCGAGCAACTAGCGGAGCAAAAGCCTTTGTATTCCAAAGCGTATATGCGGGGAAAACCCTTCGCATGACTATTGGCAACATTAACGACTGGAAGATTGATGATGCGAGAGCCGAGGCAAGACGGTTACAAACATTGATCGATACAGGGATAGATCCACGAATTGCTAAGGCTGTAAAAATCGCAGAAGCAGAATCCCTGCAGGCAGAATCACGTAAAACAAAAGTGACTTTCTCCGTCGCCTGGGAAGACTATCTTCAAGAATTGAGAACCGGTATCAGTGCAAAAACTAAACGCCCATATTCTACTCGATACATTGCCGATCACATTAACTTGTCCAGTCGTGGAGGCGAAAGTAAAAAAAGAGGCCAAGGCCCGACTTCGGCTGGACCATTGGCTAGTTTGCTCAACCTGCCGTTATCGGAGCTAACCCCAGATTACATAGCAGCGTGGCTGAGTACAGAAAGGCAAAATAGACCTACCGTCACTGCTCACGCTTATCGCCTACTACGTGCTTTCATCAAATGGAGTAATTATCAGAAAAAATATCAAGGGATCATTCCTGGCGATCTGGCACAAGATTACAACGTAAGAAAAATGGTTCCCGTGTCAGCGAGTAAAGCTGATGATTGCCTGCAAAAGGAACAACTAAAAAGCTGGTTTAGTGCCGTGCGTAGCCTCAATAATCCTATTGCATCGGCCTATCTCCAAGTACTTTTGCTCACTGGTGCTCGGCGTGAAGAAATTGCGTCGCTTCGCTGGTCAGACGTAGATTTCAAATGGTCAAGCATGCGAATTAAAGACAAGATCGAAGGTGAACGTATCATCCCTCTCACTCCTTATGTTTCTGAATTGTTAAATGTGCTAGCGCAATCCCCAAATTCTGACGTAAATAAGGAGGGTTGGGTTTTCAGAAGTAACAGTAAAAGTGGCAAAATTATTGAGCCGCGTTCAGCGCACAACAGAGCATTAGTGCTGGCTGAGTTACCACATATCAGCCTTCACGGTTTACGTCGTAGTTTTGGTACTTTGGCCGAGTGGGTTGAAGTTCCCACTGGTATTGTTGCTCAAATTATGGGACACAAACCCAGCGCTCTTGCCGAAAAACACTATCGCCGTCGTCCGTTAGATCTGTTACGAAAATGGCACGAGAAAATTGAGACATGGATCTTAAATGAAGCAGGTATTACCATAAAAAACAACGTTGATATGCGTTGATTCCATTAAAAATCAACATATTACAAAATATCATCAACTATTGATCAAGATAGATTTTCATGTATCGTAATACACAGTTTAGTCAATGATACAGCAACTACACAGGAGATAAGCCAATGGCAACCCCAGCAACTGTATCCATAGAACCCACTCTGGCAGCTATCAGAGCTCGCTGGTGTATTAATTCAAGTAAAACAACTCAATCCTTTAACGATCCTGCGTCCATGGAAGAGGTTGTCGAGTATCTCAAAGGAACATACTCAGCTCTTCGCAAGTCTGTCGCATGCGCCAAACTGAAAATTTTACATCTTAAACAAAGAATGCAAAATGCTACTAACTTTCTCGCGCGTCTGATGTCATGTAAAAATCAGGCATCCAGATCGCATCACAGTACGGCTAAATCAGCTAAAAGTGCCTTATCATCAGATTCAGGTGATGGTAGTGACCCCGACCCCGAGCCCGAAACGTTTCCTTCTGCCTTCATTACTACCCCTACTAATTCAATAATGCTTAAAGCTTTCTTTGCCAATATCTCAATCACTGAGGTGGCAAAATGAGCGCATTCAAACTCCCGGATACATCTCAATCACAGCTCATTTCAACAGCTGAGTTAGCTAAAATCATTAGCTACAAATCTCAAACCATTCGTAAATGGCTTTGTCAGGACAAATTGCCTGAGGGGCTACCTCGCCCAAAACAAATCAATGGCCGCCATTACTGGTTACGTAAAGATGTCCTCGATTTTATAGATACATTTTCTGTACGAGAAAGTCTGTAATAAATTACAGATTTAATTTTATTGATTTATAGCGATGTTGCCCCGAGAAAAATGGGGCAACACTGAGAAATTTCAGATAGTAGTTTTATATTGAGATAACAAAGAGGTTTCCTTAAAAATGTCTAATAGTGTTACTAATTTTGAGATGAGCAGCGTTCTACCAGGAAAAAAACCTTGTCAAGGCAAAAACAATGAGTCACAGGTAGTACAGACTACTCCCATAAAAAAACACTCAGTCACGTTCAAAAATCAATCTTCATTAGGAGTAATTGATCATTATGCCAGACTAACAAATAAATCTCACTCTTCCGTAATAGCGGAAGTTGTGGATTTGGCTATCCCTATATTAGAAAAATGCAATCGTCATAACTGGTCAATAAATGAAATAAAAAATGACCTGTTAAAGTTCTCTATAAAAGAAAGCATCAATCGAAGCCGAGGTAAAACAGAAGTAACTCTGGAAGAGTACTGTTCGTTAATCTGGAAAACGAACATCATGAGTCCATTAAAAATCCCCATTGCAG +>k141_50 +CGCTGGCCCTGCTTATTACAGGATGTGCTCAACAGACATTTACTGTTCAAAACAAACAGACAGCAGTAGCACCAAAGGAAACCATCACCCATCATTTCTTCGTTTCTGGAATTGGGCAGAAGAAAACTGTCGATGCAGCTAAAATTTGTGGCGGCGCAGAAAATGTTGTTAAAACAGAAACCCAGCAAACATTCGTAAATGGATTGCTCGGTTTTATTACTTTAGGCATTTATACTCCGCTGGAAGCGCGTGTGTATTGCTCAAAATAATTGCATGAGTTGCCCATCGATATGGTCAGCTCTATCTGCACTGCTCATTAATATACTTCTGGGTTCCTTCCAGTTGTTTTTGCATAGTGATCAGCCTCTCTCTGAGGGTGAAATAATCCCGTTCAGCGGTGTCTGCCAGTCGGGGGGAGGCTGCATTATCCACGCCGGAGGCCGTGGTGGCTTCACGCACTGACTGACAGACTGCTTTGATGTGCAACCGACGACGACCAGCGGCAACATCATCACGCAGAGCATCATTTTCAGCTTTCGCATCAGCTAACTCCTTCGTGTATTTTGCATCGAGCGCAGCAACATCACGCTGACGCATCTGCATGTCAGTAATTGCCGCGTTCGCTAGCTTCAGTTCTCTGGCATTTTTGTCGCGCTGGACTTTGTAGGCGATTGCGTTATCACGGTAATGATTGACCGCCCATGACAGGCTGACGATGATGCAGATAATCAGAGCGGATATAATCGCGGTTACTCTGCTCACTGTTGCCCCCACAAACAGACTTCACGCTCAATCTCACGACGAGTCATCAGGCCTTTCCATTGCTTACCGCCAGCGTATGTCCAGCGACGCAGCTGATCACATGCGCCTTTGATATCGCCCTGGTTTATTTTGCGAAGAAGCGTCGATGTTCTAAAATTGCCAGCACCCACGTTGTAAACGAATGAGTAAAGAGCGCCGCGCGTTGTTTCCGGTATATCGACTTTGATATACGGGTTAATTTGTCTGGCGACAGTGGCAAGGTCTTTATTCAAGAGTGCTTTGCATTCTGCTTTGGTATACGTTTTACCGAGCATGATGTCTTTTCCGGTGTGTCCGTGACATACAGTCCATACACCAACAATATCTTTGTATGGTATGTAGCTGACACCTTCCAGACCATCGTTACCACTTGGGCCAGTGATTAACACTGATGCTATAGCAATTGCTCCGCCACCAATAGCAGCAGCAACGGCTTTTCGTAATGATGGAGGCATTATTCACCTCTCGCAGCCTTGCGCTTATCTTCTTTAATCTTGAAATAAAGGTTTGTCAGGTACGTCAGCAGGCCAAATACCAGGCTACCCAGCACACCTATTGCTGCCCACTGTGAGGGAGTGACTTTATCTAGCAGCTGTAAAAACCAGTACCCGGCACTACCTGCTGAGGTGCCATAGGCGACACCCGTTGTTAACTTATCCATGGATTTCATAACCCCACCTCGCAGACAAAGCGGGTGTAAATTGAGGGAATACTACGAAACGTAACAGACTCGGAGTCAGTGAATAACTCAGGTATTGGGTTATCAGCTAATATCGAGACTCAAAAAATGGAAAAACCCGCTCGACGGCGGGTTTAAGCTGTGTGACGAAGTAACCACTCTTAACAGCATAACCAATTTTTTACGTACGTAAACCACTAAATGATATTTGAGAGAATGCTACCGAGTATTGAAAACACCACTACAAATACATAAGCAAATCTCAACAAATAACCAAAAAATAATTTCCAGTGTTATTTTTAGCCGGTTTAAATTGAACCTTCAAATTATAGAGCACTTATAAATAACAGCCGTTAATATAAATTGGCTAATAGATTTATTTTTATTCAGCCAAGAGCCATGAATAGGATTCGATAGAAAAAAGTTCAGATAAAAATAGAGATCTACTTCACAAATCAAACGAGAAACCAAAACTTACATCTTGAATAATCACATTGATTAGATGAATATTTATCGCGCAGTGACATCATTTTTTAATAATAGTTCAAAAAAAGGGCTCACGATGAAAAAATTAACAGTGGCAATTTCTGCTGTAGCTGCATCAGTACTAATGGCGATGTCTGCTCAGGCAGCTGAAATTTATAATAAAGACAGTAACAAGCTGGATCTGTACGGGAAAGTTAATGCTAAGCACTACTTCTCCTCTAATGATGCAGATGATGGTGATACTACTTATGCCCGTCTTGGCTTCAAAGGTGAAACCCAAATCAACGATCAACTGACTGGTTTCGGTCAGTGGGAATATGAATTCAAAGGCAACCGCGCTGAATCTCAAGGTTCCTCCAAAGATAAAACCCGTCTTGCCTTCGCTGGCCTGAAATTCGGTGACTACGGCTCCATCGATTACGGCCGTAACTACGGTGTAGCATACGACATCGGTGCGTGGACTGACGTCCTGCCAGAATTCGGTGGTGACACTTGGACTCAAACCGACGTGTTCATGACTCAACGTGCAACTGGTGTTGCAACCTATCGTAACAACGACTTCTTTGGTCTGGTTGATGGTCTGAACTTTGCTGCTCAGTACCAAGGCAAAAACGATCGTAGCGATTTCGATAACTACACTGAAGGTAACGGTGATGGCTTCGGTTTCTCTGCTACCTATGAATACGAAGGATTCGGTATCGGTGCAACTTATGCGAAATCTGATCGTACCGACACTCAAGTTAATGCAGGGAAAGTTCTTCCTGAAGTATTTGCTTCCGGTAAAAATGCAGAAGTTTGGGCCGCAGGTCTGAAATATGACGCTAACAACATTTACCTGGCCACTACCTATTCTGAAACCCAGAATATGACTGTATTTGCTGATCACTTCGTTGCTAATAAAGCCCAAAACTTCGAAGCTGTTGCACAATATCAGTTCGATTTCGGTCTGCGTCCGTCCGTTGCTTACCTGCAATCTAAAGGTAAGGATCTTGGAGTATGGGGCGATCAGGACTTAGTCAAATATGTTGATGTAGGTGCAACCTATTACTTCAACAAAAATATGTCTACTTTCGTTGATTACAAAATCAACCTGCTTGACAAAAATGACTTCACTAAGGAAGGTGCGAACAAGTCCCTGATATGAGATCATGTTTGTCATCTGGAGCCATAGAACAGGGTTCATCATGAGTCATCAACTTACCTTCGCCGACAGTGAATTCAGCAGTAAGCGCCGTCAGACCAGAAAAGAGATT +>k141_203 +GGGCTTCACTGCGAAATTCAGGCGAATGCTGTTTACGGGGTTTTTTACTGGTTGATACTGTTTTTGTCATGTGAGTCACCTCTGACTGAGAGTTTACTCACTTAGCCGCGTGTCCACTATTGCTGGGTAAGATCAGATTACGGTTGCGCCTGTTACCGCGGCAACGTCCTGTGCACAGAAGCTCTTATGCGTCCCCAGGTAATGAATAATTGCCTCTTTGCCCGTCATACACTTGCTCCTTTCAGTCCGAACTTAGCTTTAATTTCTGCGATCTTCGCCAGAGCCTGTGCACGATTTAGAGGTCTACCGCCCATAACAGGAAGTTGTTTTACTGGTTCAGGTATCGTCTCACCACGGTTAATTCGCGCTGTCATACAGGTCAGTTCATCGGCAGCCTTGCGCCGTAATTCCGCGTCAGCCAGCGCATTGGCCCGCATGTTCTGGTACAAGTTGGTAACCAACCAGTAATGCGCGTTCGATTTCCACGGATAAGACTCTGCATCCGGATACAGGCCACGCTTCCGGCAATACTCGTACCTCCCGGGATTTCATGAAATTCCGGCTCGGTGGTTTCGAGGCAATAAAATCGGCTTACATGGCCCAGGTGCAGTACAGCATGTGGGTGACGCGAAAAGATGCCTGGTACTTTGCCAACTATGACCCGCGCATGAAGCGTGAAGGCCTGCATTATGTCGTGATTGAGCGGAATGAAAAGTACATGGCGAGTTTTGACGAGATGGTGCCGGAGTTCATCGAAAAAATGGACGAGGCACTGGCTGAAATTGGTTTTGTATTTGGGGAGCAATGGCGATGACGCATCCTCACGATAATATCCGGGTACCT diff --git a/assets/test-datasets/b_fragilis_provirus_coords.tsv b/assets/test-datasets/b_fragilis_provirus_coords.tsv new file mode 100644 index 00000000..937ede92 --- /dev/null +++ b/assets/test-datasets/b_fragilis_provirus_coords.tsv @@ -0,0 +1,5 @@ +scaffold fragment start stop +k141_126 k141_126|provirus_1_1000 1 1000 +k141_126 k141_126|provirus_2501_3500 2501 3500 +k141_50 k141_50|provirus_1_1000 1 1000 +k141_203 k141_203|provirus_1_500 1 500 diff --git a/bin/derep_coordinates.py b/bin/derep_coordinates.py new file mode 100755 index 00000000..3484472e --- /dev/null +++ b/bin/derep_coordinates.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +from Bio import SeqIO +import argparse +import sys +import gzip +import pandas as pd +import math + + +def parse_args(args=None): + Description = "Dereplicate provirus coordinates to match dereplicated provirus assemblies" + Epilog = "Example usage: python derep_coordinates.py --coordinates --clusters --output " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument( + "-r", + "--coordinates", + help="Path to TSV file containing provirus coordinates for each assembly.", + ) + parser.add_argument( + "-c", + "--clusters", + help="Path to the TSV file containing cluster information from dereplicating assemblies.", + ) + parser.add_argument( + "-o", + "--output", + help="Output TSV file containing provirus coordinates for dereplicated assembly.", + ) + return parser.parse_args(args) + +def derep_coordinates(coords_tsv, clusters_tsv, output): + # open coordinates file + coords = pd.read_csv(coords_tsv, sep='\t') + # open cluster results + clusters = pd.read_csv(clusters_tsv, sep='\t', header=None) + # identify coords contained in derep clusters + derep_coords = coords[coords['scaffold'].isin(set(clusters[0]))] + + # save coords file + derep_coords.to_csv(output, sep='\t', index=False) + +def main(args=None): + args = parse_args(args) + derep_coordinates(args.coordinates, args.clusters, args.output ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/extract_proviruses.py b/bin/extract_proviruses.py new file mode 100755 index 00000000..aa321eda --- /dev/null +++ b/bin/extract_proviruses.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python + +from Bio import SeqIO +import argparse +import sys +import gzip +import pandas as pd +import math + + +def parse_args(args=None): + Description = "Extract proviruses present in assemblies based on geNomad and CheckV outputs." + Epilog = "Example usage: python extractproviruses.py --fasta --genomad --checkv --output --tsv " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument( + "-f", + "--fasta", + help="Path to FASTA file (gzipped) that contains assemblies.", + ) + parser.add_argument( + "-g", + "--genomad", + help="Path to the TSV file containing geNomad's virus summary.", + ) + parser.add_argument( + "-c", + "--checkv", + help="Path to the TSV file containing CheckV's contamination summary.", + ) + parser.add_argument( + "-o", + "--output", + help="Output FASTA file with assemblies containing proviruses.", + ) + parser.add_argument( + "-t", + "--tsv", + help="Output TSV file containing provirus coordinates.", + ) + return parser.parse_args(args) + +def extract_proviruses(fasta, genomad, checkv, out_fasta, out_tsv): + # open genomad results + genomad = pd.read_csv(genomad, sep='\t') + # identify genomad proviruses + genomad_proviruses = genomad[genomad['topology'] == 'Provirus'] + + # open checkv results + checkv = pd.read_csv(checkv, sep='\t') + # filter to checkv proviruses + checkv_proviruses = checkv[checkv['provirus'] == 'Yes'] + + # identify checkv provirus coordinates + checkv_coords = pd.DataFrame() + for index, row in checkv_proviruses.iterrows(): + contig_id = row['contig_id'] + region_types = row['region_types'].split(',') + provirus_count = 0 + # parse though regions for each contig + for i in range(len(region_types)): + if region_types[i] == 'viral': + # if a region is viral, extract contig name, assign a provirus id, and add checkv start/end coords + provirus_info = pd.DataFrame() + provirus_count += 1 + provirus_info['seq_name'] = [contig_id] + provirus_info['provirus_id'] = [contig_id + '|checkv_provirus_' + str(provirus_count)] + provirus_info[['provirus_start', 'provirus_stop']] = [row['region_coords_bp'].split(',')[i].split('-')] + checkv_coords = pd.concat([checkv_coords, provirus_info], axis=0) + + # merge genomad and checkv + genomad_checkv = genomad_proviruses.merge(checkv_coords, on='seq_name', how='outer') + + # if checkv provirus exists, add it to df and skip associated genomad provirus. If not, use geNomad provirus information + provirus_combined_coords = pd.DataFrame() + already_added = {} + for index, row in genomad_checkv.iterrows(): + if str(row['seq_name']) + '_' + str(row['provirus_id']) in already_added: + continue + else: + provirus_coords = pd.DataFrame() + # remove prefix added above or by geNomad + provirus_coords['scaffold'] = [row['seq_name'].split('|provirus')[0]] + # if checkv provirus only; use checkv coordinates + if float(row['provirus_start']) > 0 and '-' not in str(row['coordinates']): + provirus_coords['start'] = [row['provirus_start']] + provirus_coords['stop'] = [row['provirus_stop'] ] + # if checkv and genomad provirus; add genomad start and checkv start (-1 to set checkv start to 0) + # find stop by adding length (checkv_stop - checkv_start) to start coord found above + elif float(row['provirus_start']) > 0 and '-' in row['coordinates']: + provirus_coords['start'] = [int(row['coordinates'].split('-')[0]) + (int(row['provirus_start']) - 1)] + provirus_coords['stop'] = [provirus_coords['start'][0] + int(row['provirus_stop']) - int(row['provirus_start'])] + # if genomad provirus only; use genomad coordinates + elif math.isnan(row['provirus_start']) and '-' in row['coordinates']: + provirus_coords['start'] = [int(row['coordinates'].split('-')[0])] + provirus_coords['stop'] = [int(row['coordinates'].split('-')[1])] + # rename provirus fragment based on final start/end + if '|provirus_' in row['seq_name']: + provirus_coords['fragment'] = row['seq_name'].split('|provirus_')[0] + '|provirus_' + str(provirus_coords['start'][0]) + '_' + str(provirus_coords['stop'][0]) + else: + provirus_coords['fragment'] = row['seq_name'] + '|provirus_' + str(provirus_coords['start'][0]) + '_' + str(provirus_coords['stop'][0]) + # concatenate all provirus coordinates + provirus_combined_coords = pd.concat([provirus_combined_coords, provirus_coords], axis=0) + + # reorder columns to match propagate input + provirus_combined_coords_reorg = provirus_combined_coords[['scaffold', 'fragment', 'start', 'stop']] + # save coords file + provirus_combined_coords_reorg.to_csv(out_tsv, sep='\t', index=False) + # identify scaffolds to extract + provirus_scaffolds = set(provirus_combined_coords_reorg['scaffold']) + + # extract provirus scaffolds (full length) from fasta + exracted_scaffolds = [] + fasta_gunzipped = gzip.open(fasta, "rt") + for record in SeqIO.parse(fasta_gunzipped, "fasta"): + if record.id in provirus_scaffolds: + record.description = '' + record.name = '' + exracted_scaffolds.append(record) + # save all extracted provirus scaffolds to specified file + SeqIO.write(exracted_scaffolds, out_fasta, "fasta") + + +def main(args=None): + args = parse_args(args) + extract_proviruses(args.fasta, args.genomad, args.checkv, args.output, args.tsv) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/propagate.py b/bin/propagate.py new file mode 100755 index 00000000..8e3b188e --- /dev/null +++ b/bin/propagate.py @@ -0,0 +1,761 @@ +#! /usr/bin/env python3 +# Author: Kristopher Kieft, UW-Madison +# kieft@wisc.edu + +# PropagAtE: Prophage Activity Estimator +# Version: v1.1.0 +# Release date: January 2022 + +import subprocess +import os +import sys +import pysam +import numpy as np +from numba import jit + + +def prophages_check(vibe): + with open(vibe, 'r') as phage_list: + header = phage_list.readline().strip('\n').split('\t') + check_line = phage_list.readline().strip('\n') + if not check_line: + return 'no prophages' + + try: + if header[0] == "scaffold" and header[1] == "fragment" and header[5] == "nucleotide start" and header[6] == "nucleotide stop": + return 'vibrant header' + except IndexError: + pass + try: + if header[0] == "scaffold" and (header[1] == "fragment" or header[1] == "prophage") and header[2] == "start" and header[3] == "stop": + return 'custom header' + except IndexError: + pass + + sys.stderr.write("\nError: -v coordinates file is formatted incorrectly. See README for details. Exiting.\n") + return '' + +def does_exist(file, descript): + if os.path.exists(file): + sys.stderr.write(f"\nError: the {descript} already exists. Exiting.\n\n") + return True + return False + +def not_exist(file, descript): + if not os.path.exists(file): + sys.stderr.write(f"\nError: the {descript} file does not exist. Exiting.\n\n") + return True + return False + +@jit(nopython=True) +def quick_stats(depth, start, stop, min_cov, all_phages, l, host_depth): + host_depth[start:stop] = np.nan # do not count any prophages in host coverage + all_phages += l + if l >= 1000: # minimum length allowed + cov = depth[start:stop] + a = np.nanmean(cov) + m = np.nanmedian(cov) + s = np.nanstd(cov) + d = np.count_nonzero(cov >= min_cov) / l + + return a,m,s,d,all_phages,host_depth + +@jit(nopython=True) +def host_stats(depth): + eff_host = depth.size + if eff_host > 0: + a = np.nanmean(depth) + m = np.nanmedian(depth) + s = np.nanstd(depth) + return a,m,s,eff_host + else: + return 0,0,0,0 + +def coverage_stats(depth, genome, prophage_dict, prophage_lengths, min_cov, length): + ''' + Calculate average, median and standard deviation + ''' + prophages = prophage_dict[genome] + phage_covs = {} + all_phages = 0 # length of all phages on this scaffold + host_depth = depth.copy() # in case of prophage overlap + for p in prophages: + phage,start,stop = p + l = prophage_lengths[phage] + a,m,s,d,all_phages,host_depth = quick_stats(depth, start, stop, min_cov, all_phages, l, host_depth) + phage_covs[phage] = (a,m,s,l,d) + + host_depth_filter = host_depth[~np.isnan(host_depth)] + a,m,s,eff_host = host_stats(host_depth_filter) # host + return phage_covs, a, m, s, eff_host + +@jit(nopython=True) +def add_depth(depth, start, end, ed, rl, read_id): + ''' + Add depth with read alignment filter + ''' + if ed/rl <= read_id: + for i in range(start,end): + depth[i] += 1 + return depth + +@jit(nopython=True) +def add_depth_no_ed(depth, start, end): + ''' + Add depth, ignore read alignment identity + ''' + for i in range(start,end): + depth[i] += 1 + return depth + +def fasta_parse(infile): + ''' + Standard fasta parser. + ''' + with open(infile, 'r') as fasta: + for line in fasta: + if line[0] == '>': + try: + yield header, seq + except NameError: + pass # first line + seq = '' + header = line[1:].strip("\n") + else: + seq += line.strip("\n") + + # last one + yield header, seq + +def bowtie2_build(fasta, base, outpath, spaces, u): + ''' + Build bowtie2 index. + ''' + build = False + if not os.path.exists(fasta.rsplit('.',1)[0] + ".bowtie2.index.1.bt2"): + if spaces: + with open(f"{outpath}{base}.{u}.fasta", 'w') as fasta_out: + for name,seq in fasta_parse(fasta): + name = name.replace(" ", "~!!~") + fasta_out.write(f">{name}\n{seq}\n") + fasta = f"{outpath}{base}.{u}.fasta" + + subprocess.run(f"bowtie2-build {fasta} {outpath}{base}.bowtie2.index > /dev/null 2> /dev/null", shell=True) + build = True + + return build + +def run_bowtie2_paired(base, outpath, threads, forward, reverse): + subprocess.run(f"bowtie2 -x {outpath}{base}.bowtie2.index -1 {forward} -2 {reverse} -S {outpath}{base}.sam -q -p {threads} --no-unal --no-discordant > /dev/null 2> /dev/null", shell=True) + +def run_bowtie2_interleaved(base, outpath, threads, interleaved): + subprocess.run(f"bowtie2 -x {outpath}{base}.bowtie2.index --interleaved {interleaved} -S {outpath}{base}.sam -q -p {threads} --no-unal --no-discordant > /dev/null 2> /dev/null", shell=True) + +def run_bowtie2_unpaired(base, outpath, threads, unpaired): + subprocess.run(f"bowtie2 -x {outpath}{base}.bowtie2.index -U {unpaired} -S {outpath}{base}.sam -q -p {threads} --no-unal > /dev/null 2> /dev/null", shell=True) + +def post_bowtie2(outpath, base, clean, u, build, threads): + ''' + Clean up bowtie2 and SAM -> BAM + ''' + sam = f'{outpath}{base}.sam' + bam = f'{outpath}{base}.bam' + subprocess.run(f"samtools view -@ {threads} -h -b {sam} > {bam} 2> /dev/null", shell=True) + + if clean: + subprocess.run(f"rm {outpath}{base}.bowtie2.index.* 2> /dev/null", shell=True) + subprocess.run(f"rm {sam} 2> /dev/null", shell=True) + if build: + subprocess.run(f"rm {outpath}{base}.{u}.fasta 2> /dev/null", shell=True) + + return bam + +def sam_bam(sam, outpath, threads): + ''' + SAM -> BAM + ''' + try: + temp = sam.rsplit("/",1)[1] + base = temp.rsplit(".",1)[0] + except Exception: + base = sam.rsplit(".",1)[0] + + bam = f'{outpath}{base}.bam' + subprocess.run(f"samtools view -@ {threads} -h -b {sam} > {bam} 2> /dev/null", shell=True) + + return bam + + +def sort_bam(bam, outpath, threads, clean, clean_bam): + ''' + Sort BAM file. + ''' + if int(os.stat(bam).st_size) == 0: + return False, None + # check if anything aligned + check_aligned = subprocess.check_output(f"samtools view -@ {threads} {bam} | head -n 1", shell=True) + if len(check_aligned) == 0: + return False, None + + try: + temp = bam.rsplit("/",1)[1] + base = temp.rsplit(".",1)[0] + except Exception: + base = bam.rsplit(".",1)[0] + + sort_check = False + if clean_bam: # only need to check if bamfile was the input + try: + check = subprocess.check_output(f"samtools view -@ {threads} -H {bam} | grep '@HD'", shell=True) + if "coordinate" in str(check): + sort_check = True + except Exception: + # no @HD line, retain sort_check = False + pass + if not sort_check: + subprocess.run(f"samtools sort -@ {threads} -o {outpath}{base}.sorted.bam {bam} 2> /dev/null", shell=True) + + if clean and not clean_bam: + subprocess.run(f'rm {bam}', shell=True) + + bam = f'{outpath}{base}.sorted.bam' + + return True, bam + +@jit(nopython=True) +def get_len(start,stop): + ''' + Length of prophages. + Python is zero-based and end-exclusive in range(), + so even though start = start-1, leave this math + as stop-start to include the start and stop base precisely + ''' + return stop-start + + +@jit(nopython=True) +def reverse_and_zero_based(start, stop): + ''' + Reverse start and stop if the prophage is in reverse coordinates. + -1 to start for Python zero-based format. + Leave stop for Python range exclusive format. + ''' + if start > stop: + start, stop = stop, start + start -= 1 + return start,stop + + +def process_coordinates_file(vibe, spaces, vibe_header): + ''' + Convert coordinates file into usable dictionary and list of names. + ''' + prophage_dict = {} + prophage_lengths = {} + prophage_dict_frags = {} + if vibe_header == 'vibrant header': + x = 5 + y = 6 + elif vibe_header == 'custom header': + x = 2 + y = 3 + + with open(vibe, 'r') as phage_list: + next(phage_list) + for line in phage_list: + line = line.strip('\n').split('\t') + if not line: continue + name = line[0] + frag = line[1] + + if spaces: + name = name.replace(" ", "~!!~") + frag = frag.replace(" ", "~!!~") + + start = int(line[x]) + stop = int(line[y]) + start, stop = reverse_and_zero_based(start, stop) + prophage_dict.setdefault(name, []).append((frag, start, stop)) + prophage_lengths[frag] = get_len(start,stop) + prophage_dict_frags[frag] = name + + genomes_full = set(list(prophage_dict.keys())) + prophages = len(prophage_dict_frags.keys()) + + return True, prophage_dict, prophage_dict_frags, genomes_full, prophages, prophage_lengths + +def get_lengths(fasta, spaces, genomes_full): + ''' + Get genome lengths for initialization of np.zeros(length) + ''' + lengths = {} + if spaces: + for name,seq in fasta_parse(fasta): + name = name.replace(" ", "~!!~") + if name in genomes_full: + lengths[name] = len(seq) + else: + for name,seq in fasta_parse(fasta): + if name in genomes_full: + lengths[name] = len(seq) + + return lengths + + +def extract_coverage(bam, read_id, lengths, mask, outfile, prophage_dict, effect, ratio_cutoff, prophage_dict_frags, prophage_lengths, min_breadth, min_cov, clean, spaces): + ''' + Code mainly from vRhyme (same author). + Extract coverage information from BAM file. + ''' + with open(outfile, 'w') as output: + output.write("prophage\thost\tactive\tCohenD\tprophage-host_ratio\tmean_difference\tprophage_len\tprophage_mean_cov\tprophage_median_cov\tprophage_sd_cov\tprophage_cov_breadth\thost_len\thost_mean_cov\thost_median_cov\thost_sd_cov\n") + written = [] + total = 0 + + bai = False + if not os.path.exists(bam + '.bai'): + subprocess.run(f'samtools index {bam}', shell=True) + bai = True + + if mask != 0: + mask_f = mask + mask_r = -mask + else: + mask_f = False + mask_r = False + + bamfile = pysam.AlignmentFile(bam, "rb") + + if read_id != 0: + for x in bamfile.fetch(until_eof=True): + genome = x.reference_name + length = lengths.get(genome,False) + if length: # not in keep + try: + if genome != prev: + if mask_f: + depth[:mask_f] = np.nan + depth[mask_r:] = np.nan + prophage_covs, avg, med, sd, eff_host = coverage_stats(depth, prev, prophage_dict, prophage_lengths, min_cov, length) + written, total = write_coverages(outfile, prophage_covs, prev, avg, med, sd, eff_host, effect, ratio_cutoff, written, total, min_breadth, min_cov, spaces) + depth = np.zeros(length) + except NameError: + # prev not defined + depth = np.zeros(length) + + prev = genome + + ed = 0 + for t in x.tags: + if t[0] == 'NM': + ed = t[1] + break + rl = x.query_length + + start = x.reference_start # 0-based + end = x.reference_end + if end: + depth = add_depth(depth, start, end, ed, rl, read_id) + + if length: # not in keep + # last one + if mask_f: + depth[:mask_f] = np.nan + depth[mask_r:] = np.nan + prophage_covs, avg, med, sd, eff_host = coverage_stats(depth, prev, prophage_dict, prophage_lengths, min_cov, length) + written, total = write_coverages(outfile, prophage_covs, prev, avg, med, sd, eff_host, effect, ratio_cutoff, written, total, min_breadth, min_cov, spaces) + depth = None + else: # no mismatches + for x in bamfile.fetch(until_eof=True): + genome = x.reference_name + length = lengths.get(genome,False) + if length: # not in keep + try: + if genome != prev: + if mask_f: + depth[:mask_f] = np.nan + depth[mask_r:] = np.nan + prophage_covs, avg, med, sd, eff_host = coverage_stats(depth, prev, prophage_dict, prophage_lengths, min_cov, length) + written, total = write_coverages(outfile, prophage_covs, prev, avg, med, sd, eff_host, effect, ratio_cutoff, written, total, min_breadth, min_cov, spaces) + depth = np.zeros(length) + except NameError: + depth = np.zeros(length) + + prev = genome + start = x.reference_start # 0-based + end = x.reference_end + if end: + depth = add_depth_no_ed(depth, start, end) + + if length: # not in keep + # last one + if mask_f: + depth[:mask_f] = np.nan + depth[mask_r:] = np.nan + prophage_covs, avg, med, sd, eff_host = coverage_stats(depth, prev, prophage_dict, prophage_lengths, min_cov, length) + written, total = write_coverages(outfile, prophage_covs, prev, avg, med, sd, eff_host, effect, ratio_cutoff, written, total, min_breadth, min_cov, spaces) + depth = None + + bamfile.close() + if clean and bai: + subprocess.run(f'rm {bam}.bai', shell=True) + + include_zeros(outfile, prophage_lengths, written, prophage_dict_frags, lengths, spaces) + + return total + +@jit(nopython=True) +def cohenD(phage_mean, phage_sd, host_mean, host_sd): + """ + Cohen's d equation + """ + try: + pool = ((phage_sd**2+host_sd**2)/2)**0.5 + d = abs((host_mean-phage_mean)/pool) + except Exception: + # host has 0 coverage + d = 0 + return d + +@jit(nopython=True) +def activity(phage_mean,cov_depth,avg,d,effect,ratio_cutoff,min_breadth,min_cov,total): + ''' + Determine activity based on cutoffs + ''' + try: + ratio = phage_mean/avg + except Exception: + ratio = phage_mean + diff = phage_mean-avg + active = 'dormant' + if d >= effect and ratio >= ratio_cutoff: + if cov_depth >= min_breadth and phage_mean >= min_cov: + active = 'active' + total += 1 + else: + active = 'ambiguous' + return active,total,diff,ratio + +def write_coverages(outfile, prophage_covs, host, avg, med, sd, length, effect, ratio_cutoff, written, total, min_breadth, min_cov, spaces): + ''' + Within the BAM loop. + Perform statistical analyses and write out final results. + ''' + with open(outfile, 'a') as output: + for key,p in prophage_covs.items(): + phage_mean,phage_med,phage_sd,l,cov_depth = p + d = cohenD(phage_mean, phage_sd, avg, sd) + active,total,diff,ratio = activity(phage_mean,cov_depth,avg,d,effect,ratio_cutoff,min_breadth,min_cov,total) + if spaces: + key = key.replace("~!!~", " ") + host = host.replace("~!!~", " ") + output.write(f'{key}\t{host}\t{active}\t{d}\t{ratio}\t{diff}\t{l}\t{phage_mean}\t{phage_med}\t{phage_sd}\t{cov_depth}\t{length}\t{avg}\t{med}\t{sd}\n') + written.append(key) + return written, total + +def include_zeros(outfile, prophage_lengths, written, prophage_dict_frags, lengths, spaces): + ''' + Write out all the prophages not identified within the BAM file + ''' + written = set(written) + with open(outfile, 'a') as output: + for key,host in prophage_dict_frags.items(): + l = prophage_lengths[key] + length = lengths[host] + if spaces: + key = key.replace("~!!~", " ") + host = host.replace("~!!~", " ") + if key in written: continue + output.write(f'{key}\t{host}\tnot present\tNA\tNA\tNA\t{l}\tNA\tNA\tNA\tNA\t{length}\tNA\tNA\tNA\n') + +try: + import warnings + warnings.filterwarnings("ignore") + import argparse + import subprocess + import sys + import time + import datetime + import logging + import os + +except Exception as e: + sys.stderr.write("\nError: please verify dependancy imports are installed and up to date:\n\n") + sys.stderr.write(str(e) + "\n\n") + exit(1) + +# Set up variables +start = time.time() + +descript = ''' + PropagAtE: Prophage Activity Estimator (v1.1.0) + + Using a prophage coordinates file, fasta file and coverage information, + calculate if prophages were active in the given sample. + Prophages should be connected (integrated) to the host scaffold/genome. + + Example: input paired reads and run Bowtie2 + Propagate -f scaffolds.fasta -r forward.fastq reverse.fastq -o output_folder -v prophage_coordinates.tsv -t threads + + Example: input BAM alignment file + Propagate -f scaffolds.fasta -b alignment.bam -o output_folder -v prophage_coordinates.tsv + +''' + +propagate = argparse.ArgumentParser(description=descript, formatter_class=argparse.RawTextHelpFormatter, usage=argparse.SUPPRESS) +propagate.add_argument('--version', action='version', version='PropagAtE v1.1.0') +required = propagate.add_argument_group('REQUIRED') +coverage = propagate.add_argument_group('PICK ONE') +common = propagate.add_argument_group('COMMON') +edit = propagate.add_argument_group('EDIT METHODS') + +# Input / Output +required.add_argument('-f', metavar='', type=str, nargs=1, required = True, help='input genomes/scaffolds (can have extra sequences in file)') +required.add_argument('-v', metavar='', type=str, nargs=1, required = True, help='VIBRANT "integrated_prophage_coordinates" file or custom file (see README)') +# +coverage.add_argument('-b', metavar='', type=str, nargs=1, default = [''], help='input BAM sequence alignment file') +coverage.add_argument('-s', metavar='', type=str, nargs=1, default = [''], help='input SAM sequence alignment file') +coverage.add_argument('-r', metavar='', type=str, nargs=2, default = ['',''], help='input paired read files separated by a space (forward reverse)') +coverage.add_argument('-i', metavar='', type=str, nargs=1, default = [''], help='input interleaved paired read file') +coverage.add_argument('-u', metavar='', type=str, nargs=1, default = [''], help='input unpaired read file') +# +common.add_argument('-o', metavar='', type=str, nargs=1, default = [''], help='name of output folder [default = Propagate_results_(-v)]') +common.add_argument('-t', metavar='', type=str, nargs=1, default = ['1'], help='threads [1]') +# +edit.add_argument('-p', metavar='', type=str, nargs=1, default = ['0.97'], help='minimum percent identity per aligned read for calculating coverage [0.97]') +edit.add_argument('-e', metavar='', type=str, nargs=1, default = ['0.70'], help="minimum effect size for significance by Cohen's d test [default=0.70, minimum=0.60]") +edit.add_argument('-c', metavar='', type=str, nargs=1, default = ['2.0'], help="minimum prophage:host coverage ratio for significance [default=2.0, minimum=1.50]") +edit.add_argument('--mask', metavar='', type=str, nargs=1, default = ['150'], help="mask coverage values bases on each end of a scaffold [150]") +edit.add_argument('--min', metavar='', type=str, nargs=1, default = ['1.0'], help="minimum average coverage to consider prophage present and for --breadth [1.0]") +edit.add_argument('--breadth', metavar='', type=str, nargs=1, default = ['0.5'], help="minimum breadth of coverage as fraction of bases >= minimum coverage (--min) [0.5]") +edit.add_argument('--clean', action='store_true', help='remove generated SAM, unsorted BAM, Bowtie2 index. Retain user input files and sorted BAM [off]') + +# Parse arguments +args = propagate.parse_args() +samfile = str(args.s[0]) +bamfile = str(args.b[0]) +clean_bam = False +if bamfile: + clean_bam = True +fasta = str(args.f[0]) +forward = str(args.r[0]) +reverse = str(args.r[1]) +if forward and not reverse: + sys.stderr.write("\nA reverse read set must be provided with the forward. Exiting.\n") + exit(1) +interleaved = str(args.i[0]) +unpaired = str(args.u[0]) +threads = str(args.t[0]) +vibe = str(args.v[0]) +try: + temp = vibe.rsplit("/",1)[1] + u = temp.rsplit(".",1)[0] +except Exception: + u = vibe.rsplit(".",1)[0] +mask = int(args.mask[0]) +min_cov = float(args.min[0]) +min_breadth = float(args.breadth[0]) +effect = float(args.e[0]) +read_id = float(args.p[0]) +ratio_cutoff = float(args.c[0]) +outpath = str(args.o[0]) +if not outpath: + outpath = f'PropagAtE_results_{u}/' +if outpath[-1] != '/': + outpath += '/' + +# make sure inputs are correct +exist = [ + does_exist(outpath, 'output folder'), + not_exist(fasta, 'fasta'), + not_exist(vibe, 'coordinates') +] + +if any(exist): + exit(1) + +vibe_header = prophages_check(vibe) +if not vibe_header: + exit(1) + +# verify inputs +check = [samfile, bamfile, forward, interleaved, unpaired] +check = [c for c in check if c != ''] +if len(check) > 1 or not check: + sys.stderr.write(f"\nOnly one input file (-s, -b, -r, -i, -u) is allowed. {len(check)} provided. Exiting.\n") + exit(1) + +if forward and reverse: + if not forward.endswith('.fastq') and not forward.endswith('.fastq.gz'): + sys.stderr.write("\nError: Provided paired reads files must both have the extension .fastq or .fastq.gz. Exiting.\n") + sys.stderr.write(f"{forward}\n") + exit(1) + if not reverse.endswith('.fastq') and not reverse.endswith('.fastq.gz'): + sys.stderr.write("\nError: Provided paired reads files must both have the extension .fastq or .fastq.gz. Exiting.\n") + sys.stderr.write(f"{reverse}\n\n") + exit(1) +if interleaved: + if not interleaved.endswith('.fastq') and not interleaved.endswith('.fastq.gz'): + sys.stderr.write("\nError: Provided interleaved reads file must have the extension .fastq or .fastq.gz. Exiting.\n") + sys.stderr.write(f"{interleaved}\n\n") + exit(1) +if unpaired: + if not unpaired.endswith('.fastq') and not unpaired.endswith('.fastq.gz'): + sys.stderr.write("\nError: Provided unpaired reads file must have the extension .fastq or .fastq.gz. Exiting.\n") + sys.stderr.write(f"{unpaired}\n\n") + exit(1) + +if samfile: + if not_exist(samfile, 'sam file'): + exit(1) + if not bamfile.endswith('.sam'): + sys.stderr.write("\nError: Provided sam file must have the extension .sam. Exiting.\n") + exit(1) +if bamfile: + if not_exist(bamfile, 'bam file'): + exit(1) + if not bamfile.endswith('.bam'): + sys.stderr.write("\nError: Provided bam file must have the extension .bam. Exiting.\n") + exit(1) + +if effect < 0.6: + sys.stderr.write("\nError: Cohen's d effect size (-e) should not be set below 0.6. Exiting.\n") + exit(1) +if min_breadth > 1: + sys.stderr.write("\nError: breadth (--breadth) should be a decimal value <= 1. Exiting.\n") + exit(1) +if ratio_cutoff < 1.5: + sys.stderr.write("\nError: ratio cutoff (-c) should not be set below 1.5. Exiting.\n") + exit(1) +if read_id > 1: + sys.stderr.write("\nError: percent identity (-p) should be a decimal value <= 1. Exiting.\n") + exit(1) +read_id = 1.0 - read_id + + +# set up folder and log +subprocess.run(f'mkdir {outpath}', shell=True) +if outpath.count('/') > 1: + base = outpath.rsplit("/",2)[1] +else: + base = outpath[:-1] +outfile = f'{outpath}{base}.tsv' +logfilename = f'{outpath}{base}.log' +logging.basicConfig(filename=logfilename, level=logging.INFO, format='%(message)s') + +##### ----------------------------------------------------------------------------------------------------------------------- ##### +logging.info("Command: %s" % ' '.join(sys.argv)) +logging.info("") +logging.info("Date: %s" % str(datetime.date.today())) +logging.info("Time: %s" % str(datetime.datetime.now().time()).rsplit(".",1)[0]) +logging.info("Program: PropagAtE v1.1.0\n") + +logging.info("Time (min) | Log ") +logging.info("--------------------------------------------------------------------") + +if vibe_header == 'no prophages': + logging.info("%s No prophages were found in the input coordinates file" % str(round((time.time() - float(start))/60,1))) + logging.info("%s Analysis finished" % str(round((time.time() - float(start))/60,1))) + logging.info("") + logging.info("") + logging.info("Results file: %s" % outfile.replace(outpath,'')) + logging.info("Active prophages: 0") + logging.info("") + os.mkdir(outpath) + open(outpath + "/" + outpath + ".tsv").close() + +# check for spaces in fasta +spaces = False +try: + check = subprocess.check_output(f'grep -c " " {fasta}', shell=True) + check = int(spaces.strip("'").strip("b")) + if check > 0: + spaces = True +except Exception: + spaces = True + +# If input is reads/fasta run Bowtie2 +if forward or interleaved or unpaired: + logging.info("%s Reads input identified, using %s threads to run Bowtie2." % (str(round((time.time() - float(start))/60,1)),threads)) + + try: + subprocess.check_output("which bowtie2", shell=True) + except Exception: + sys.stderr.write("\nError: Bowtie2 does not appear to be installed or is not in the system's PATH. Exiting.\n") + logging.info("\nError: Bowtie2 does not appear to be installed or is not in the system's PATH. Exiting.\n") + exit(1) + try: + temp = fasta.rsplit("/",1)[1] + base = temp.rsplit(".",1)[0] + except Exception: + base = fasta.rsplit(".",1)[0] + + build = bowtie2_build(fasta, base, outpath, spaces, u) + if forward: + if not os.path.exists(forward) or not os.path.exists(reverse): + sys.stderr.write("\nError: the forward and/or reverse reads files do not exist. Exiting.\n\n") + exit(1) + run_bowtie2_paired(base, outpath, threads, forward, reverse) + elif interleaved: + if not os.path.exists(interleaved): + sys.stderr.write("\nError: the interleaved reads file does not exist. Exiting.\n\n") + exit(1) + run_bowtie2_interleaved(base, outpath, threads, interleaved) + elif unpaired: + if not os.path.exists(unpaired): + sys.stderr.write("\nError: the unpaired reads file does not exist. Exiting.\n\n") + exit(1) + run_bowtie2_unpaired(base, outpath, threads, unpaired) + bamfile = post_bowtie2(outpath, base, args.clean, u, build, threads) + +if samfile: + logging.info("%s Converting SAM file to BAM format" % str(round((time.time() - float(start))/60,1))) + bamfile = sam_bam(samfile, outpath, threads) + +if bamfile: + logging.info("%s Checking if BAM file needs to be sorted" % str(round((time.time() - float(start))/60,1))) + check_bam, bamfile = sort_bam(bamfile, outpath, threads, args.clean, clean_bam) + if not check_bam: + sys.stderr.write("\nError: The SAM/BAM file appears to be empty, was not converted properly, or no reads aligned. Exiting.\n") + logging.info("\nError: The SAM/BAM file appears to be empty, was not converted properly, or no reads aligned. Exiting.\n") + exit(1) + +# 'bamfile' is now from read alignment, sam, unsorted bam, or direct input + +# Read in prophage coordinate data +logging.info("%s Generating a list of all prophage regions" % str(round((time.time() - float(start))/60,1))) +check, prophage_dict, prophage_dict_frags, genomes_full, prophages, prophage_lengths = process_coordinates_file(vibe, spaces, vibe_header) + +number_hosts = len(genomes_full) +logging.info("%s Number of prophage regions identified: %s" % (str(round((time.time() - float(start))/60,1)),prophages)) +logging.info("%s Number of unique host regions identified: %s" % (str(round((time.time() - float(start))/60,1)),number_hosts)) + +# process sam/bam files +logging.info("%s Extracting coverage and performing statistical analyses" % str(round((time.time() - float(start))/60,1))) +lengths = get_lengths(fasta, spaces, genomes_full) +total = extract_coverage(bamfile, read_id, lengths, mask, outfile, prophage_dict, effect, ratio_cutoff, prophage_dict_frags, prophage_lengths, min_breadth, min_cov, args.clean, spaces) + +logging.info("%s Analysis finished" % str(round((time.time() - float(start))/60,1))) +logging.info("") +logging.info("") +logging.info("Results file: %s" % outfile.replace(outpath,'')) +logging.info("Active prophages: %s" % total) +logging.info("") +logging.info(' ##') +logging.info(' ## ##') +logging.info(' ## ##') +logging.info('###### ## ## ## ####### ###### ##### ## ##') +logging.info('## ## ## ## ## ## ## ## ## ## ##') +logging.info('###### ###### ###### ## ### ###### ### ##') +logging.info('## ## ## ## ## ## ## ## ## ##') +logging.info('## ## ## ## ## ####### ###### ##### ##') +logging.info(' # ## #') +logging.info(' # # ## # #') +logging.info(' # # # #') +logging.info(' # #') +logging.info("") + + +# +# +# diff --git a/modules/local/anicluster/aniclust/main.nf b/modules/local/anicluster/aniclust/main.nf index d6f0adf4..a52ee4c2 100644 --- a/modules/local/anicluster/aniclust/main.nf +++ b/modules/local/anicluster/aniclust/main.nf @@ -8,7 +8,11 @@ process ANICLUSTER_ANICLUST { 'biocontainers/mulled-v2-80c23cbcd32e2891421c54d1899665046feb07ef:77a31e289d22068839533bf21f8c4248ad274b60-0' }" input: - tuple val(meta), path(fasta), path(ani) + tuple val(meta) , path(fasta) + tuple val(meta2), path(ani) + val min_ani + val min_qcov + val min_tcov output: tuple val(meta), path("*_clusters.tsv") , emit: clusters @@ -22,10 +26,13 @@ process ANICLUSTER_ANICLUST { def prefix = task.ext.prefix ?: "${meta.id}" """ aniclust.py \\ - --fna $fasta \\ - --ani $ani \\ + --fna ${fasta} \\ + --ani ${ani} \\ --out ${prefix}_clusters.tsv \\ - $args + --min_ani ${min_ani} \\ + --min_qcov ${min_qcov} \\ + --min_tcov ${min_tcov} \\ + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/anicluster/aniclust/nextflow.config b/modules/local/anicluster/aniclust/nextflow.config index 5bf3d920..d7bfa63a 100644 --- a/modules/local/anicluster/aniclust/nextflow.config +++ b/modules/local/anicluster/aniclust/nextflow.config @@ -1,10 +1,5 @@ process { withName: ANICLUSTER_ANICLUST { - ext.args = [ - "--min_ani ${params.anicluster_min_ani}", - "--min_qcov ${params.anicluster_min_qcov}", - "--min_tcov ${params.anicluster_min_tcov}" - ].join(' ').trim() publishDir = [ path: { "${params.outdir}/GenomeClustering/aniclust" }, mode: params.publish_dir_mode, diff --git a/modules/local/anicluster/aniclust/tests/main.nf.test b/modules/local/anicluster/aniclust/tests/main.nf.test index d722cb7b..4c1a1c33 100644 --- a/modules/local/anicluster/aniclust/tests/main.nf.test +++ b/modules/local/anicluster/aniclust/tests/main.nf.test @@ -8,13 +8,24 @@ nextflow_process { test("fasta.gz & ani.tsv") { when { + params { + anicluster_min_ani = 95 + anicluster_min_qcov = 0 + anicluster_min_tcov = 85 + } process { """ input[0] = [ [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true ), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true ) + ] + input[1] = [ + [ id: 'test' ], file(params.pipelines_testdata_base_path + 'modules/local/anicluster/aniclust/ani.tsv', checkIfExists: true ) ] + input[2] = params.anicluster_min_ani + input[3] = params.anicluster_min_qcov + input[4] = params.anicluster_min_tcov """ } } @@ -32,13 +43,24 @@ nextflow_process { options "-stub" when { + params { + anicluster_min_ani = 95 + anicluster_min_qcov = 0 + anicluster_min_tcov = 85 + } process { """ input[0] = [ [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true ), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true ) + ] + input[1] = [ + [ id: 'test' ], file(params.pipelines_testdata_base_path + 'modules/local/anicluster/aniclust/ani.tsv', checkIfExists: true ) ] + input[2] = params.anicluster_min_ani + input[3] = params.anicluster_min_qcov + input[4] = params.anicluster_min_tcov """ } } diff --git a/modules/local/propagate/derepcoordinates/environment.yml b/modules/local/propagate/derepcoordinates/environment.yml new file mode 100644 index 00000000..66136216 --- /dev/null +++ b/modules/local/propagate/derepcoordinates/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::biopython=1.78 + - conda-forge::pandas=1.2.4 diff --git a/modules/local/propagate/derepcoordinates/main.nf b/modules/local/propagate/derepcoordinates/main.nf new file mode 100644 index 00000000..62e709e1 --- /dev/null +++ b/modules/local/propagate/derepcoordinates/main.nf @@ -0,0 +1,53 @@ +process PROPAGATE_DEREPCOORDINATES { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-80c23cbcd32e2891421c54d1899665046feb07ef:77a31e289d22068839533bf21f8c4248ad274b60-0' : + 'biocontainers/mulled-v2-80c23cbcd32e2891421c54d1899665046feb07ef:77a31e289d22068839533bf21f8c4248ad274b60-0' }" + + input: + tuple val(meta) , path(coordinates) + tuple val(meta2), path(clusters) + + output: + tuple val(meta), path("${prefix}_derep_provirus_coords.tsv") , emit: derep_coords + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + derep_coordinates.py \\ + --coordinates ${coordinates} \\ + --clusters ${clusters} \\ + --output ${prefix}_derep_provirus_coords.tsv \\ + ${args} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //' ) + biopython: \$(echo \$(biopython_version.py 2>&1)) + numpy: \$(echo \$(numpy_version.py 2>&1)) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_derep_provirus_coords.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //' ) + biopython: \$(echo \$(biopython_version.py 2>&1)) + numpy: \$(echo \$(numpy_version.py 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/local/propagate/derepcoordinates/nextflow.config b/modules/local/propagate/derepcoordinates/nextflow.config new file mode 100644 index 00000000..034b1ba8 --- /dev/null +++ b/modules/local/propagate/derepcoordinates/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: PROPAGATE_DEREPCOORDINATES { + publishDir = [ + path: { "${params.outdir}/ProvirusActivity/propagate/derepcoordinates" }, + mode: params.publish_dir_mode, + pattern: '*_derep_provirus_coords.tsv', + ] + } +} diff --git a/modules/local/propagate/derepcoordinates/tests/main.nf.test b/modules/local/propagate/derepcoordinates/tests/main.nf.test new file mode 100644 index 00000000..526fc8a7 --- /dev/null +++ b/modules/local/propagate/derepcoordinates/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process: PROPAGATE_DEREPCOORDINATES" + script "../main.nf" + process "PROPAGATE_DEREPCOORDINATES" + + setup { + run("PROPAGATE_EXTRACTPROVIRUSES") { + script "../../extractproviruses/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/propagate_test_contigs.fasta.gz", checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_genomad_virus_summary.tsv", checkIfExists: true) + ] + ) + input[2] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_checkv_contamination.tsv", checkIfExists: true) + ] + ) + """ + } + } + } + + + test("coords.tsv + clusters.tsv") { + + when { + process { + """ + input[0] = PROPAGATE_EXTRACTPROVIRUSES.out.provirus_coords + input[1] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_clusters.tsv", checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("coords.tsv + clusters.tsv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = PROPAGATE_EXTRACTPROVIRUSES.out.provirus_coords + input[1] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_clusters.tsv", checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/propagate/derepcoordinates/tests/main.nf.test.snap b/modules/local/propagate/derepcoordinates/tests/main.nf.test.snap new file mode 100644 index 00000000..2da21e51 --- /dev/null +++ b/modules/local/propagate/derepcoordinates/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "coords.tsv + clusters.tsv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_derep_provirus_coords.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,244bc446383cbd763a9b5a5d58208fb9" + ], + "derep_coords": [ + [ + { + "id": "test" + }, + "test_derep_provirus_coords.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,244bc446383cbd763a9b5a5d58208fb9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T08:42:07.920114246" + }, + "coords.tsv + clusters.tsv": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_derep_provirus_coords.tsv:md5,8c7a6441698648dfd73085524ce6a9cf" + ] + ], + "1": [ + "versions.yml:md5,244bc446383cbd763a9b5a5d58208fb9" + ], + "derep_coords": [ + [ + { + "id": "test" + }, + "test_derep_provirus_coords.tsv:md5,8c7a6441698648dfd73085524ce6a9cf" + ] + ], + "versions": [ + "versions.yml:md5,244bc446383cbd763a9b5a5d58208fb9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T08:41:57.465382143" + } +} \ No newline at end of file diff --git a/modules/local/propagate/extractproviruses/environment.yml b/modules/local/propagate/extractproviruses/environment.yml new file mode 100644 index 00000000..66136216 --- /dev/null +++ b/modules/local/propagate/extractproviruses/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::biopython=1.78 + - conda-forge::pandas=1.2.4 diff --git a/modules/local/propagate/extractproviruses/main.nf b/modules/local/propagate/extractproviruses/main.nf new file mode 100644 index 00000000..3161a8e6 --- /dev/null +++ b/modules/local/propagate/extractproviruses/main.nf @@ -0,0 +1,59 @@ +process PROPAGATE_EXTRACTPROVIRUSES { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-80c23cbcd32e2891421c54d1899665046feb07ef:77a31e289d22068839533bf21f8c4248ad274b60-0' : + 'biocontainers/mulled-v2-80c23cbcd32e2891421c54d1899665046feb07ef:77a31e289d22068839533bf21f8c4248ad274b60-0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(virus_summary) + tuple val(meta3), path(contamination_summary) + + output: + tuple val(meta), path("${prefix}_provirus_scaffolds.fasta.gz") , emit: provirus_scaffolds + tuple val(meta), path("${prefix}_provirus_coords.tsv") , emit: provirus_coords + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + extract_proviruses.py \\ + --fasta ${fasta} \\ + --genomad ${virus_summary} \\ + --checkv ${contamination_summary} \\ + --output ${prefix}_provirus_scaffolds.fasta \\ + --tsv ${prefix}_provirus_coords.tsv \\ + ${args} + + gzip ${prefix}_provirus_scaffolds.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //' ) + biopython: \$(echo \$(biopython_version.py 2>&1)) + numpy: \$(echo \$(numpy_version.py 2>&1)) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}_provirus_scaffolds.fasta.gz + touch ${prefix}_provirus_coords.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$( python --version | sed 's/Python //' ) + biopython: \$(echo \$(biopython_version.py 2>&1)) + numpy: \$(echo \$(numpy_version.py 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/local/propagate/extractproviruses/nextflow.config b/modules/local/propagate/extractproviruses/nextflow.config new file mode 100644 index 00000000..1b32d44a --- /dev/null +++ b/modules/local/propagate/extractproviruses/nextflow.config @@ -0,0 +1,7 @@ +process { + withName: PROPAGATE_EXTRACTPROVIRUSES { + publishDir = [ + enabled: false + ] + } +} diff --git a/modules/local/propagate/extractproviruses/tests/main.nf.test b/modules/local/propagate/extractproviruses/tests/main.nf.test new file mode 100644 index 00000000..6c8ec6bc --- /dev/null +++ b/modules/local/propagate/extractproviruses/tests/main.nf.test @@ -0,0 +1,79 @@ +nextflow_process { + + name "Test Process: PROPAGATE_EXTRACTPROVIRUSES" + script "../main.nf" + process "PROPAGATE_EXTRACTPROVIRUSES" + + + test("contigs.fasta + genomad_virus_summary + check_contamination_summary") { + + when { + process { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/propagate_test_contigs.fasta.gz", checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_genomad_virus_summary.tsv", checkIfExists: true) + ] + ) + input[2] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_checkv_contamination.tsv", checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("contigs.fasta + genomad_virus_summary + check_contamination_summary - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/propagate_test_contigs.fasta.gz", checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_genomad_virus_summary.tsv", checkIfExists: true) + ] + ) + input[2] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_checkv_contamination.tsv", checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/propagate/extractproviruses/tests/main.nf.test.snap b/modules/local/propagate/extractproviruses/tests/main.nf.test.snap new file mode 100644 index 00000000..13706f0b --- /dev/null +++ b/modules/local/propagate/extractproviruses/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "contigs.fasta + genomad_virus_summary + check_contamination_summary": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_provirus_scaffolds.fasta.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_provirus_coords.tsv:md5,67b5d5efea9a88fca9e86002e79bc650" + ] + ], + "2": [ + "versions.yml:md5,f5e250dfe50e8080455f7b76e1dab523" + ], + "provirus_coords": [ + [ + { + "id": "test" + }, + "test_provirus_coords.tsv:md5,67b5d5efea9a88fca9e86002e79bc650" + ] + ], + "provirus_scaffolds": [ + [ + { + "id": "test" + }, + "test_provirus_scaffolds.fasta.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f5e250dfe50e8080455f7b76e1dab523" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-18T23:29:10.659223267" + }, + "contigs.fasta + genomad_virus_summary + check_contamination_summary - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_provirus_scaffolds.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_provirus_coords.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,f5e250dfe50e8080455f7b76e1dab523" + ], + "provirus_coords": [ + [ + { + "id": "test" + }, + "test_provirus_coords.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "provirus_scaffolds": [ + [ + { + "id": "test" + }, + "test_provirus_scaffolds.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,f5e250dfe50e8080455f7b76e1dab523" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-18T23:29:24.642465762" + } +} \ No newline at end of file diff --git a/modules/local/propagate/propagate/environment.yml b/modules/local/propagate/propagate/environment.yml new file mode 100644 index 00000000..9c59162b --- /dev/null +++ b/modules/local/propagate/propagate/environment.yml @@ -0,0 +1,7 @@ +name: propagate_propagate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::vrhyme=1.1.0 diff --git a/modules/local/propagate/propagate/main.nf b/modules/local/propagate/propagate/main.nf new file mode 100644 index 00000000..c46bf564 --- /dev/null +++ b/modules/local/propagate/propagate/main.nf @@ -0,0 +1,54 @@ +process PROPAGATE_PROPAGATE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vrhyme:1.1.0--pyhdfd78af_1' : + 'biocontainers/vrhyme:1.1.0--pyhdfd78af_1' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(provirus_coords) + + output: + tuple val(meta), path("${prefix}_propagate/${prefix}_propagate.tsv") , emit: results + tuple val(meta), path("${prefix}_propagate/${prefix}_propagate.log") , emit: log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + propagate.py \\ + -f ${fasta} \\ + -v ${provirus_coords} \\ + -r ${reads} \\ + -o ${prefix}_propagate \\ + -t ${task.cpus} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + propagate: \$(propagate.py --version 2>&1 | sed 's/^.*PropagAtE v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}_propagate + touch ${prefix}_propagate/${prefix}_propagate.tsv + touch ${prefix}_propagate/${prefix}_propagate.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + propagate: \$(propagate.py --version 2>&1 | sed 's/^.*PropagAtE v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/propagate/propagate/nextflow.config b/modules/local/propagate/propagate/nextflow.config new file mode 100644 index 00000000..43f73804 --- /dev/null +++ b/modules/local/propagate/propagate/nextflow.config @@ -0,0 +1,17 @@ +process { + withName: PROPAGATE_PROPAGATE { + publishDir = [ + path: { "${params.outdir}/ProvirusActivity/propagate/propagate" }, + mode: params.publish_dir_mode, + pattern: '**/*_propagate.tsv', + ] + ext.prefix = { "${meta.id}_${meta.group}" } + ext.args = [ + params.propagate_min_read_id ? "-p ${params.propagate_min_read_id}" : "", + params.propagate_min_effect ? "-e ${params.propagate_min_effect}" : "", + params.propagate_min_cov_ratio ? "-c ${params.propagate_min_cov_ratio}" : "", + params.propagate_min_cov ? "--min ${params.propagate_min_cov}" : "", + params.propagate_min_breadth ? "--breadth ${params.propagate_min_breadth}" : "", + ].join(' ').trim() + } +} diff --git a/modules/local/propagate/propagate/tests/main.nf.test b/modules/local/propagate/propagate/tests/main.nf.test new file mode 100644 index 00000000..5293609e --- /dev/null +++ b/modules/local/propagate/propagate/tests/main.nf.test @@ -0,0 +1,89 @@ +nextflow_process { + + name "Test Process: PROPAGATE_PROPAGATE" + script "../main.nf" + process "PROPAGATE_PROPAGATE" + + + test("fastq.gz + fasta + provirus_coords.tsv") { + when { + process { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_propagate_contigs.fasta", checkIfExists: true) + ] + ) + input[2] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilist_provirus_coords.tsv", checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.results, + path(process.out.log[0][1]).readLines()[17..32], + process.out.versions + ).match() + } + ) + } + } + + test("fastq.gz + fasta + provirus_coords.tsv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_propagate_contigs.fasta", checkIfExists: true) + ] + ) + input[2] = Channel.of( + [ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilist_provirus_coords.tsv", checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/propagate/propagate/tests/main.nf.test.snap b/modules/local/propagate/propagate/tests/main.nf.test.snap new file mode 100644 index 00000000..eb358f33 --- /dev/null +++ b/modules/local/propagate/propagate/tests/main.nf.test.snap @@ -0,0 +1,89 @@ +{ + "fastq.gz + fasta + provirus_coords.tsv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_propagate.tsv:md5,f5dc748967a1840bff9c504804d4b6d4" + ] + ], + [ + "Results file: test_propagate.tsv", + "Active prophages: 0", + "", + " ##", + " ## ##", + " ## ##", + "###### ## ## ## ####### ###### ##### ## ##", + "## ## ## ## ## ## ## ## ## ## ##", + "###### ###### ###### ## ### ###### ### ##", + "## ## ## ## ## ## ## ## ## ##", + "## ## ## ## ## ####### ###### ##### ##", + " # ## #", + " # # ## # #", + " # # # #", + " # #", + "" + ], + [ + "versions.yml:md5,dfe5c94afe825f0011160109ed5f8bb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T15:12:59.933317207" + }, + "fastq.gz + fasta + provirus_coords.tsv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_propagate.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_propagate.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,dfe5c94afe825f0011160109ed5f8bb1" + ], + "log": [ + [ + { + "id": "test" + }, + "test_propagate.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "results": [ + [ + { + "id": "test" + }, + "test_propagate.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dfe5c94afe825f0011160109ed5f8bb1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T15:13:10.985670747" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index efda4043..c81ce228 100644 --- a/nextflow.config +++ b/nextflow.config @@ -77,12 +77,24 @@ params { // Phage lifestyle options run_bacphlip = false + // Prophage activity options + run_propagate = false + propagate_derep_ani = 99 + propagate_derep_qcov = 0 + propagate_derep_tcov = 50 + propagate_min_read_id = null + propagate_min_effect = null + propagate_min_cov_ratio = null + propagate_min_cov = null + propagate_min_breadth = null + + // Phage functional annotation run_pharokka = false pharokka_db = null // Virus microdiversity options - skip_instrain = false + run_instrain = false instrain_min_ani = null instrain_min_mapq = null instrain_min_variant_cov = null diff --git a/nextflow_schema.json b/nextflow_schema.json index dc4a29b2..4a632cfe 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -341,9 +341,9 @@ "description": "Analyze virus diversity at the strain level", "default": "", "properties": { - "skip_instrain": { + "run_instrain": { "type": "boolean", - "description": "Bypass microdiversity analysis with inStrain" + "description": "Run inStrain to perform microdiversity analysis" }, "instrain_min_ani": { "type": "number", @@ -383,6 +383,53 @@ } } }, + "provirus_activity_options": { + "title": "Provirus activity options", + "type": "object", + "description": "Predict provirus activity based on genome coverage", + "default": "", + "properties": { + "run_propagate": { + "type": "boolean", + "description": "Run propagAtE to predict provirus activity" + }, + "propagate_derep_ani": { + "type": "integer", + "default": 99, + "description": "Minimum ANI when dereplicating assemblies containing proviruses" + }, + "propagate_derep_qcov": { + "type": "integer", + "default": 0, + "description": "Minimum query coverage when dereplicating assemblies containing proviruses" + }, + "propagate_derep_tcov": { + "type": "integer", + "default": 50, + "description": "Minimum test coverage when dereplicating assemblies containing proviruses" + }, + "propagate_min_read_id": { + "type": "string", + "description": "Minimum percent identity when aligning reads to proviral assemblies" + }, + "propagate_min_effect": { + "type": "string", + "description": "Minimum Cohen's D effect size for a provirus to be considered active" + }, + "propagate_min_cov_ratio": { + "type": "string", + "description": "Minimum provirus:host coverage ratio" + }, + "propagate_min_cov": { + "type": "string", + "description": "Minimum provirus depth coverage to be considered present" + }, + "propagate_min_breadth": { + "type": "string", + "description": "Minimum provirus breadth of coverage to be considered present" + } + } + }, "developer_options": { "title": "Developer options", "type": "object", @@ -654,6 +701,9 @@ { "$ref": "#/definitions/virus_microdiversity_options" }, + { + "$ref": "#/definitions/provirus_activity_options" + }, { "$ref": "#/definitions/developer_options" }, diff --git a/subworkflows/local/fasta_all_v_all_blast/main.nf b/subworkflows/local/fasta_all_v_all_blast/main.nf deleted file mode 100644 index 4c823216..00000000 --- a/subworkflows/local/fasta_all_v_all_blast/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -// -// Compare sequences by performing an all-v-all BLAST -// -include { BLAST_MAKEBLASTDB } from '../../../modules/nf-core/blast/makeblastdb/main' -include { BLAST_BLASTN } from '../../../modules/nf-core/blast/blastn/main' - -workflow FASTA_ALL_V_ALL_BLAST { - - take: - fasta_gz // [ [ meta ], fasta.gz ] , assemblies/genomes (mandatory) - - main: - ch_versions = Channel.empty() - - // - // MODULE: Make BLASTN database - // - ch_blast_db = BLAST_MAKEBLASTDB ( fasta_gz ).db - ch_versions = ch_versions.mix( BLAST_MAKEBLASTDB.out.versions ) - - // - // MODULE: Perform BLAST - // - ch_blast_txt = BLAST_BLASTN ( fasta_gz , ch_blast_db ).txt - ch_versions = ch_versions = ch_versions.mix( BLAST_MAKEBLASTDB.out.versions ) - - emit: - blast_txt = ch_blast_txt // [ [ meta ], blast_output.tsv ] , TSV file containing BLAST results - versions = ch_versions // [ versions.yml ] - -} diff --git a/subworkflows/local/fasta_all_v_all_blast/tests/main.nf.test.snap b/subworkflows/local/fasta_all_v_all_blast/tests/main.nf.test.snap deleted file mode 100644 index 357c342c..00000000 --- a/subworkflows/local/fasta_all_v_all_blast/tests/main.nf.test.snap +++ /dev/null @@ -1,37 +0,0 @@ -{ - "fasta.gz": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.txt:md5,aa3f583ccce0464e25ced18c56760404" - ] - ], - "1": [ - "versions.yml:md5,3a67b197cc055b730a0f1aa4d0cd31c0", - "versions.yml:md5,3a67b197cc055b730a0f1aa4d0cd31c0" - ], - "blast_txt": [ - [ - { - "id": "test" - }, - "test.txt:md5,aa3f583ccce0464e25ced18c56760404" - ] - ], - "versions": [ - "versions.yml:md5,3a67b197cc055b730a0f1aa4d0cd31c0", - "versions.yml:md5,3a67b197cc055b730a0f1aa4d0cd31c0" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T16:37:03.577392353" - } -} diff --git a/subworkflows/local/fasta_cluster_blast/main.nf b/subworkflows/local/fasta_cluster_blast/main.nf new file mode 100644 index 00000000..019dbe4a --- /dev/null +++ b/subworkflows/local/fasta_cluster_blast/main.nf @@ -0,0 +1,73 @@ +// +// Compare sequences by performing an all-v-all BLAST +// +include { GUNZIP } from '../../../modules/nf-core/gunzip/main' +include { BLAST_MAKEBLASTDB } from '../../../modules/nf-core/blast/makeblastdb/main' +include { BLAST_BLASTN } from '../../../modules/nf-core/blast/blastn/main' +include { ANICLUSTER_ANICALC } from '../../../modules/local/anicluster/anicalc/main' +include { ANICLUSTER_ANICLUST } from '../../../modules/local/anicluster/aniclust/main' +include { ANICLUSTER_EXTRACTREPS } from '../../../modules/local/anicluster/extractreps/main' + +workflow FASTA_CLUSTER_BLAST { + + take: + fasta_gz // [ [ meta ], fasta.gz ] , assemblies/genomes (mandatory) + min_ani // val [ 0 - 100 ] , minimum ANI for alignment to be counted + min_qcov // val [ 0 - 100 ] , minimum query cover for clustering + min_tcov // val [ 0 - 100 ] , minimum test cover for clustering + + main: + ch_versions = Channel.empty() + + // + // MODULE: Make BLASTN database + // + ch_blast_db = BLAST_MAKEBLASTDB ( fasta_gz ).db + ch_versions = ch_versions.mix( BLAST_MAKEBLASTDB.out.versions ) + + // + // MODULE: Perform BLAST + // + ch_blast_txt = BLAST_BLASTN ( fasta_gz , ch_blast_db ).txt + ch_versions = ch_versions = ch_versions.mix( BLAST_MAKEBLASTDB.out.versions ) + + // + // MODULE: Calculate average nucleotide identity (ANI) and alignment fraction (AF) based on BLAST + // + ch_ani_tsv = ANICLUSTER_ANICALC ( ch_blast_txt ).ani + ch_versions = ch_versions.mix( ANICLUSTER_ANICALC.out.versions ) + + // create input for ANICLUSTER_ANICALC + ch_aniclust_input = fasta_gz.join( ch_ani_tsv ) + .multiMap { meta, fasta_gz, ani_tsv -> + fasta: [ meta, fasta_gz ] + ani: [ meta, ani_tsv ] + } + + // + // MODULE: Cluster virus sequences based on ANI and AF + // + ch_clusters_tsv = ANICLUSTER_ANICLUST ( + ch_aniclust_input.fasta, + ch_aniclust_input.ani, + min_ani, + min_qcov, + min_tcov + ).clusters + ch_versions = ch_versions.mix( ANICLUSTER_ANICLUST.out.versions ) + + // create input for extracting cluster representatives + ch_extractreps_input = fasta_gz.join( ch_clusters_tsv ) + + // + // MODULE: Extract cluster representatives + // + ch_anicluster_reps_fasta_gz = ANICLUSTER_EXTRACTREPS ( ch_extractreps_input ).representatives + ch_versions = ch_versions.mix( ANICLUSTER_EXTRACTREPS.out.versions ) + + emit: + cluster_reps_fasta_gz = ch_anicluster_reps_fasta_gz // [ [ meta ], cluster_reps.fasta.gz ] , FASTA file cluster representatives + clusters_tsv = ch_clusters_tsv // [ [ meta ], clusters.tsv ] , TSV file containing cluster membership + versions = ch_versions // [ versions.yml ] + +} diff --git a/subworkflows/local/fasta_all_v_all_blast/nextflow.config b/subworkflows/local/fasta_cluster_blast/nextflow.config similarity index 100% rename from subworkflows/local/fasta_all_v_all_blast/nextflow.config rename to subworkflows/local/fasta_cluster_blast/nextflow.config diff --git a/subworkflows/local/fasta_all_v_all_blast/tests/main.nf.test b/subworkflows/local/fasta_cluster_blast/tests/main.nf.test similarity index 53% rename from subworkflows/local/fasta_all_v_all_blast/tests/main.nf.test rename to subworkflows/local/fasta_cluster_blast/tests/main.nf.test index ccaea3d2..fef86edc 100644 --- a/subworkflows/local/fasta_all_v_all_blast/tests/main.nf.test +++ b/subworkflows/local/fasta_cluster_blast/tests/main.nf.test @@ -1,17 +1,25 @@ nextflow_workflow { - name "Test Subworkflow: FASTA_ALL_V_ALL_BLAST" - script "subworkflows/local/fasta_all_v_all_blast/main.nf" - workflow "FASTA_ALL_V_ALL_BLAST" + name "Test Subworkflow: FASTA_CLUSTER_BLAST" + script "subworkflows/local/fasta_cluster_blast/main.nf" + workflow "FASTA_CLUSTER_BLAST" // Dependencies tag "BLAST_MAKEBLASTDB" tag "BLAST_BLASTN" + tag "ANICLUSTER_ANICALC" + tag "ANICLUSTER_ANICLUST" + tag "ANICLUSTER_EXTRACTREPS" - test("fasta.gz") { + test("fasta.gz + 95 + 0 + 85") { when { + params { + anicluster_min_ani = 95 + anicluster_min_qcov = 0 + anicluster_min_tcov = 85 + } workflow { """ input[0] = Channel.of( @@ -20,6 +28,9 @@ nextflow_workflow { file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) ] ) + input[1] = params.anicluster_min_ani + input[2] = params.anicluster_min_qcov + input[3] = params.anicluster_min_tcov """ } } diff --git a/subworkflows/local/fasta_cluster_blast/tests/main.nf.test.snap b/subworkflows/local/fasta_cluster_blast/tests/main.nf.test.snap new file mode 100644 index 00000000..b2372bda --- /dev/null +++ b/subworkflows/local/fasta_cluster_blast/tests/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "fasta.gz + 95 + 0 + 85": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_anicluster_representatives.fna.gz:md5,832eac6f8ccfb75cd524696eb1935141" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_clusters.tsv:md5,da359c01739cc9ddc70a435b7ddc1f3a" + ] + ], + "2": [ + "versions.yml:md5,305338928ed290d9497d0a5d6f8bf3bb", + "versions.yml:md5,98963f24e789a741d265a3da5e6f1870", + "versions.yml:md5,eb2640269b37949f9a3fcadfca898450", + "versions.yml:md5,eb2640269b37949f9a3fcadfca898450", + "versions.yml:md5,f52ada5f485f0efe322e88949ac1bbe0" + ], + "cluster_reps_fasta_gz": [ + [ + { + "id": "test" + }, + "test_anicluster_representatives.fna.gz:md5,832eac6f8ccfb75cd524696eb1935141" + ] + ], + "clusters_tsv": [ + [ + { + "id": "test" + }, + "test_clusters.tsv:md5,da359c01739cc9ddc70a435b7ddc1f3a" + ] + ], + "versions": [ + "versions.yml:md5,305338928ed290d9497d0a5d6f8bf3bb", + "versions.yml:md5,98963f24e789a741d265a3da5e6f1870", + "versions.yml:md5,eb2640269b37949f9a3fcadfca898450", + "versions.yml:md5,eb2640269b37949f9a3fcadfca898450", + "versions.yml:md5,f52ada5f485f0efe322e88949ac1bbe0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T14:53:55.502121639" + } +} \ No newline at end of file diff --git a/subworkflows/local/fasta_all_v_all_blast/tests/tags.yml b/subworkflows/local/fasta_cluster_blast/tests/tags.yml similarity index 100% rename from subworkflows/local/fasta_all_v_all_blast/tests/tags.yml rename to subworkflows/local/fasta_cluster_blast/tests/tags.yml diff --git a/subworkflows/local/fastq_fasta_provirus_activity_propagate/main.nf b/subworkflows/local/fastq_fasta_provirus_activity_propagate/main.nf new file mode 100644 index 00000000..b6d0df8a --- /dev/null +++ b/subworkflows/local/fastq_fasta_provirus_activity_propagate/main.nf @@ -0,0 +1,133 @@ +// +// Predict provirus activity with Propagate +// +include { PROPAGATE_EXTRACTPROVIRUSES } from '../../../modules/local/propagate/extractproviruses/main' +include { CAT_CAT as CAT_FASTA } from '../../../modules/nf-core/cat/cat/main' +include { CAT_CAT as CAT_COORDS } from '../../../modules/nf-core/cat/cat/main' +include { GUNZIP } from '../../../modules/nf-core/gunzip/main' +include { FASTA_CLUSTER_BLAST as FASTA_GROUP_DEREPLICATE_BLAST } from '../fasta_cluster_blast/main' +include { PROPAGATE_DEREPCOORDINATES } from '../../../modules/local/propagate/derepcoordinates/main' +include { PROPAGATE_PROPAGATE } from '../../../modules/local/propagate/propagate/main' + +workflow FASTQ_FASTA_PROVIRUS_ACTIVITY_PROPAGATE { + take: + fastq_gz // [ [ meta ], reads_1.fastq.gz, reads_1.fastq.gz ] , reads (mandatory) + fasta_gz // [ [ meta ], assembly.fasta.gz ] , assemblies (mandatory) + ch_virus_summaries_tsv // [ [ meta ], virus_summary.tsv ] , genomad virus summary (mandatory) + ch_contamination_tsv // [ [ meta ], contamination.tsv ] , checkV contamination summary (mandatory) + min_ani // val [ 0 - 100 ] , minimum ANI for dereplication + min_qcov // val [ 0 - 100 ] , minimum query coverage when clustering + min_tcov // val [ 0 - 100 ] , minimum test coverage when clustering + + main: + ch_versions = Channel.empty() + + // + // MODULE: Identify integrated proviruses (and optionally assign to clusters) + // + ch_provirus_scaffolds_fasta_gz = PROPAGATE_EXTRACTPROVIRUSES ( fasta_gz, ch_virus_summaries_tsv, ch_contamination_tsv ).provirus_scaffolds + ch_provirus_coords_tsv = PROPAGATE_EXTRACTPROVIRUSES.out.provirus_coords + ch_versions = ch_versions.mix(PROPAGATE_EXTRACTPROVIRUSES.out.versions) + + // combine provirus assemblies by group + ch_grouped_proviruses_fasta_gz = ch_provirus_scaffolds_fasta_gz + .map { + meta, fasta -> + def meta_new = [:] + meta_new.id = meta.group + return [ meta_new, fasta ] + } + .groupTuple( sort: 'deep' ) + + // + // MODULE: Concatenate provirus assemblies within groups + // + ch_combined_proviruses_fasta_gz = CAT_FASTA ( ch_grouped_proviruses_fasta_gz ).file_out + ch_versions = ch_versions.mix(CAT_FASTA.out.versions) + + // + // SUBWORKFLOW: Dereplicate provirus-containing assemblies within groups + // + ch_derep_scaffolds_fasta_gz = FASTA_GROUP_DEREPLICATE_BLAST ( + ch_combined_proviruses_fasta_gz, + min_ani, + min_qcov, + min_tcov + ).cluster_reps_fasta_gz + ch_derep_clusters_tsv = FASTA_GROUP_DEREPLICATE_BLAST.out.clusters_tsv + ch_versions = ch_versions.mix ( FASTA_GROUP_DEREPLICATE_BLAST.out.versions ) + + // + // MODULE: Gunzip assemblies for input into propagate + // + ch_derep_scaffolds_fasta = GUNZIP ( ch_derep_scaffolds_fasta_gz ).gunzip + ch_versions = ch_versions.mix ( GUNZIP.out.versions ) + + // Combine coords files within groups + ch_grouped_coords_tsv = ch_provirus_coords_tsv + .map { + meta, coords -> + def meta_new = [:] + meta_new.id = meta.group + return [ meta_new, coords ] + } + .groupTuple( sort:'deep') + + // + // MODULE: Combine coords files within group into one coords file + // + ch_combined_coords_tsv = CAT_COORDS ( ch_grouped_coords_tsv ).file_out + ch_versions = ch_versions.mix ( CAT_COORDS.out.versions ) + + // combine coords and cluster files by group + ch_derep_coords_input = ch_combined_coords_tsv.join ( ch_derep_clusters_tsv ) + .multiMap { + meta, coords, clusters -> + coords: [ meta, coords ] + clusters: [ meta, clusters ] + } + + // + // MODULE: Identify provirus coordinates in dereplicated assemblies + // + ch_derep_provirus_coords_tsv = PROPAGATE_DEREPCOORDINATES ( ch_derep_coords_input.coords, ch_derep_coords_input.clusters ).derep_coords + ch_versions = ch_versions.mix ( PROPAGATE_DEREPCOORDINATES.out.versions ) + + // join reads with assemblies and provirus coords by group + ch_propagate_input = fastq_gz + .map { + meta, fastq -> + def meta_new = [:] + + meta_new.id = meta.group + + return [ meta_new, meta.id, fastq ] + } + .join ( ch_derep_scaffolds_fasta, by:0 ) + .join ( ch_derep_provirus_coords_tsv, by: 0 ) + .multiMap { + meta, id, fastq, fasta, coords -> + def meta_new = [:] + + meta_new.id = id + meta_new.group = meta.id + + reads: [ meta_new, fastq ] + assemblies: [ meta_new, fasta ] + coords: [ meta_new, coords ] + } + + // + // MODULE: Predict provirus activity + // + ch_propagate_results_tsv = PROPAGATE_PROPAGATE ( + ch_propagate_input.reads, + ch_propagate_input.assemblies, + ch_propagate_input.coords + ).results + ch_versions = ch_versions.mix(PROPAGATE_PROPAGATE.out.versions) + + emit: + propagate_results_tsv = ch_propagate_results_tsv // [ [ meta ], propagate_results.tsv ] , TSV file containing provirus activity predictions + versions = ch_versions // [ versions.yml ] +} diff --git a/subworkflows/local/fastq_fasta_provirus_activity_propagate/nextflow.config b/subworkflows/local/fastq_fasta_provirus_activity_propagate/nextflow.config new file mode 100644 index 00000000..a9a90138 --- /dev/null +++ b/subworkflows/local/fastq_fasta_provirus_activity_propagate/nextflow.config @@ -0,0 +1,13 @@ +includeConfig '../../../modules/nf-core/cat/cat/nextflow.config' +includeConfig '../../../modules/local/propagate/extractproviruses/nextflow.config' +includeConfig '../../../modules/nf-core/gunzip/nextflow.config' +includeConfig '../subworkflows/local/fasta_cluster_blast/nextflow.config' +includeConfig '../../../modules/nf-core/gunzip/nextflow.config' +includeConfig '../../../modules/nf-core/blast/makeblastdb/nextflow.config' +includeConfig '../../../modules/nf-core/blast/blastn/nextflow.config' +includeConfig '../../../modules/local/anicluster/anicalc/nextflow.config' +includeConfig '../../../modules/local/anicluster/aniclust/nextflow.config' +includeConfig '../../../modules/local/anicluster/extractreps/nextflow.config' +includeConfig '../../../modules/local/propagate/derepcoordinates/nextflow.config' +includeConfig '../../../modules/local/propagate/propagate/nextflow.config' + diff --git a/subworkflows/local/fastq_fasta_provirus_activity_propagate/tests/main.nf.test b/subworkflows/local/fastq_fasta_provirus_activity_propagate/tests/main.nf.test new file mode 100644 index 00000000..0181bd5a --- /dev/null +++ b/subworkflows/local/fastq_fasta_provirus_activity_propagate/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_workflow { + + name "Test Subworkflow: FASTQ_FASTA_PROVIRUS_ACTIVITY_PROPAGATE" + script "subworkflows/local/fastq_fasta_provirus_activity_propagate/main.nf" + workflow "FASTQ_FASTA_PROVIRUS_ACTIVITY_PROPAGATE" + + tag "subworkflows" + tag "subworkflows_local" + tag "fastq_fasta_provirus_activity_propagate" + tag "fastq_fasta_provirus_activity_propagate_default" + + + test("Parameters: default") { + + when { + params { + min_ani = 99 + min_qcov = 0 + min_tcov = 50 + } + workflow { + """ + input[0] = Channel.of( + [ + [ id: 'test', group:'test_group' ], + [ + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz", checkIfExists: true) + ] + ], + [ + [ id: 'test2', group:'test_group' ], + [ + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [ id:'test', group:'test_group'], + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz", checkIfExists: true) + ], + [ + [ id:'test2', group:'test_group' ], + file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz", checkIfExists: true) + ] + ) + input[2] = Channel.of( + [ + [ id:'test', group:'test_group' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_genomad_virus_summary.tsv", checkIfExists: true) + ], + [ + [ id:'test2', group:'test_group' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_genomad_virus_summary.tsv", checkIfExists: true) + ] + ) + input[3] = Channel.of( + [ + [ id:'test', group:'test_group' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_checkv_contamination.tsv", checkIfExists: true) + ], + [ + [ id:'test2', group:'test_group' ], + file(params.pipelines_testdata_base_path + "modules/local/propagate/b_fragilis_checkv_contamination.tsv", checkIfExists: true) + ] + ) + input[4] = params.min_ani + input[5] = params.min_qcov + input[6] = params.min_tcov + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/local/fastq_fasta_provirus_activity_propagate/tests/main.nf.test.snap b/subworkflows/local/fastq_fasta_provirus_activity_propagate/tests/main.nf.test.snap new file mode 100644 index 00000000..bdd25c45 --- /dev/null +++ b/subworkflows/local/fastq_fasta_provirus_activity_propagate/tests/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "Parameters: default": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "group": "test_group" + }, + "test_propagate.tsv:md5,c4aea1fd8b52b7c948032898441487db" + ] + ], + "1": [ + "versions.yml:md5,24ea9a1d779e345e2b0bfb1c98ba988f", + "versions.yml:md5,24ea9a1d779e345e2b0bfb1c98ba988f", + "versions.yml:md5,2c4ef24adda9d78886356f0c141eb520", + "versions.yml:md5,5091066e83e141629b28a757e640dccd", + "versions.yml:md5,5091066e83e141629b28a757e640dccd", + "versions.yml:md5,54ef0ec38df0a174aa7d4dc38d290aaa", + "versions.yml:md5,57550d14b8b1f00db2739d291a5efadc", + "versions.yml:md5,6200306290395b2494f07332c64a29b5", + "versions.yml:md5,86d513a8ab4f4ad42f4ab7b3a069da00", + "versions.yml:md5,9660592e07edcc398250af390c262b8c", + "versions.yml:md5,a6a07089c62d3bd613ba2c35069bed2c", + "versions.yml:md5,b108265366213ef9c79813d788e14d5b" + ], + "propagate_results_tsv": [ + [ + { + "id": "test", + "group": "test_group" + }, + "test_propagate.tsv:md5,c4aea1fd8b52b7c948032898441487db" + ] + ], + "versions": [ + "versions.yml:md5,24ea9a1d779e345e2b0bfb1c98ba988f", + "versions.yml:md5,24ea9a1d779e345e2b0bfb1c98ba988f", + "versions.yml:md5,2c4ef24adda9d78886356f0c141eb520", + "versions.yml:md5,5091066e83e141629b28a757e640dccd", + "versions.yml:md5,5091066e83e141629b28a757e640dccd", + "versions.yml:md5,54ef0ec38df0a174aa7d4dc38d290aaa", + "versions.yml:md5,57550d14b8b1f00db2739d291a5efadc", + "versions.yml:md5,6200306290395b2494f07332c64a29b5", + "versions.yml:md5,86d513a8ab4f4ad42f4ab7b3a069da00", + "versions.yml:md5,9660592e07edcc398250af390c262b8c", + "versions.yml:md5,a6a07089c62d3bd613ba2c35069bed2c", + "versions.yml:md5,b108265366213ef9c79813d788e14d5b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T10:18:05.086006574" + } +} \ No newline at end of file diff --git a/workflows/phageannotator/main.nf b/workflows/phageannotator/main.nf index d4febe50..9506f3fb 100644 --- a/workflows/phageannotator/main.nf +++ b/workflows/phageannotator/main.nf @@ -14,9 +14,6 @@ include { SEQKIT_SEQ } from '../../modules/local/ include { APPENDSCREENHITS } from '../../modules/local/appendscreenhits/main' include { EXTRACTVIRALASSEMBLIES } from '../../modules/local/extractviralassemblies/main' include { QUALITYFILTERVIRUSES } from '../../modules/local/qualityfilterviruses/main' -include { ANICLUSTER_ANICALC } from '../../modules/local/anicluster/anicalc/main' -include { ANICLUSTER_ANICLUST } from '../../modules/local/anicluster/aniclust/main' -include { ANICLUSTER_EXTRACTREPS } from '../../modules/local/anicluster/extractreps/main' include { COVERM_CONTIG } from '../../modules/local/coverm/contig/main' // TODO: Add to nf-core include { INSTRAIN_STB } from '../../modules/local/instrain/stb/main' @@ -28,11 +25,11 @@ include { FASTQ_FASTA_REFERENCE_CONTAINMENT_MASH } from '../../subworkflows/l include { FASTA_VIRUS_CLASSIFICATION_GENOMAD } from '../../subworkflows/local/fasta_virus_classification_genomad/main' // TODO: Add to nf-core; Add nf-tests to nf-core modules include { FASTQ_FASTA_CONTIG_EXTENSION_COBRA } from '../../subworkflows/local/fastq_fasta_contig_extension_cobra/main' // TODO: Add to nf-core; Add nf-tests to nf-core modules include { FASTA_VIRUS_QUALITY_CHECKV } from '../../subworkflows/local/fasta_virus_quality_checkv/main' // TODO: Add to nf-core; Add nf-tests to nf-core modules -include { FASTA_ALL_V_ALL_BLAST } from '../../subworkflows/local/fasta_all_v_all_blast/main' +include { FASTA_CLUSTER_BLAST } from '../../subworkflows/local/fasta_cluster_blast/main' include { FASTA_PHAGE_HOST_IPHOP } from '../../subworkflows/local/fasta_phage_host_iphop/main' // TODO: Add to nf-core; Add nf-tests to nf-core modules include { FASTA_PHAGE_FUNCTION_PHAROKKA } from '../../subworkflows/local/fasta_phage_function_pharokka/main' include { FASTA_MICRODIVERSITY_INSTRAIN } from '../../subworkflows/local/fasta_microdiversity_instrain/main' // TODO: Add to nf-core; Add nf-tests to nf-core modules - +include { FASTQ_FASTA_PROVIRUS_ACTIVITY_PROPAGATE } from '../../subworkflows/local/fastq_fasta_provirus_activity_propagate/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -74,12 +71,10 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Estimate viral enrichment in reads ------------------------------------------------------------------------------*/ - // if run_viromeqc == true, run subworkflow if ( params.run_viromeqc ) { ch_virus_enrichment_tsv = FASTQ_VIRUS_ENRICHMENT_VIROMEQC ( fastq_gz ).enrichment_tsv ch_versions = ch_versions.mix(FASTQ_VIRUS_ENRICHMENT_VIROMEQC.out.versions) } else { - // if run_viromeqc == false, skip subworkflow ch_virus_enrichment_tsv = Channel.empty() } @@ -94,7 +89,6 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Reference virus identification ------------------------------------------------------------------------------*/ - // if run_reference_containment == true, run subworkflow if ( params.run_reference_containment ) { // if reference based identification requested, a reference FASTA file must be included if ( !params.reference_virus_fasta ) { @@ -126,7 +120,6 @@ workflow PHAGEANNOTATOR { ch_assembly_w_references_fasta_gz = APPENDSCREENHITS ( ch_append_screen_hits_input, ch_reference_virus_fasta_gz.first() ).assembly_w_screen_hits ch_versions = ch_versions.mix(APPENDSCREENHITS.out.versions.first()) } else { - // if run_reference_containment == false, skip subworkflow and use input assemblies ch_assembly_w_references_fasta_gz = ch_filtered_input_fasta_gz } @@ -134,7 +127,6 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- De novo virus classification ------------------------------------------------------------------------------*/ - // if skip_genomad == false OR run_genomad_taxonomy == true, prepare genomad db if ( !params.skip_genomad || params.run_genomad_taxonomy ) { // // De-novo virus classification using assemblies @@ -146,7 +138,6 @@ workflow PHAGEANNOTATOR { ch_genomad_db = Channel.value( file( params.genomad_db, checkIfExists:true ) ) } - // if skip_genomad == false run genomad subworkflow // // SUBWORKFLOW: Classify and annotate sequences // @@ -162,7 +153,6 @@ workflow PHAGEANNOTATOR { ch_virus_summaries_tsv = Channel.empty() } } else { - // if skip_genomad == true use assemblies with references ch_viruses_fna_gz = ch_assembly_w_references_fasta_gz ch_virus_summaries_tsv = Channel.empty() } @@ -200,7 +190,6 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Assess virus quality and filter ------------------------------------------------------------------------------*/ - // if skip_checkv == false, run subworkflow if ( !params.skip_checkv ) { // create channel from params.checkv_db if ( !params.checkv_db ){ @@ -225,7 +214,6 @@ workflow PHAGEANNOTATOR { ch_filtered_viruses_fna_gz = QUALITYFILTERVIRUSES ( ch_quality_filter_viruses_input2 ).filtered_viruses ch_versions = ch_versions.mix(QUALITYFILTERVIRUSES.out.versions.first()) } else { - // if skip_checkv == false, use non-quality filtered viruses ch_filtered_viruses_fna_gz = ch_viruses_fna_gz ch_quality_summary_tsv = Channel.empty() } @@ -234,7 +222,7 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Cluster viruses using all-v-all BLAST approach ------------------------------------------------------------------------------*/ - // if skip_virus_clustering == false, run subworkflow + // TODO: Add option to cluster within groups rather than across all samples if ( !params.skip_virus_clustering ) { // create a channel for combining filtered viruses (sorted so output is the same for tests) ch_cat_viruses_input = ch_filtered_viruses_fna_gz @@ -247,36 +235,13 @@ workflow PHAGEANNOTATOR { ch_filtered_viruses_combined_fna_gz = CAT_VIRUSES ( ch_cat_viruses_input ).file_out // - // SUBWORKFLOW: Perform all-v-all BLAST - // - ch_blast_txt = FASTA_ALL_V_ALL_BLAST ( ch_filtered_viruses_combined_fna_gz ).blast_txt - ch_versions = ch_versions.mix( FASTA_ALL_V_ALL_BLAST.out.versions ) - - // - // MODULE: Calculate average nucleotide identity (ANI) and alignment fraction (AF) based on BLAST - // - ch_ani_tsv = ANICLUSTER_ANICALC ( ch_blast_txt ).ani - ch_versions = ch_versions.mix( ANICLUSTER_ANICALC.out.versions ) - - // create input for ANICLUSTER_ANICALC - ch_aniclust_input = ch_filtered_viruses_combined_fna_gz.join( ch_ani_tsv ) - - // - // MODULE: Cluster virus sequences based on ANI and AF + // SUBWORKFLOW: Perform BLAST-based clustering // - ch_clusters_tsv = ANICLUSTER_ANICLUST ( ch_aniclust_input ).clusters - ch_versions = ch_versions.mix( ANICLUSTER_ANICLUST.out.versions ) + ch_anicluster_reps_fasta_gz = FASTA_CLUSTER_BLAST ( ch_filtered_viruses_combined_fna_gz, params.anicluster_min_ani, params.anicluster_min_qcov, params.anicluster_min_tcov ).cluster_reps_fasta_gz + ch_clusters_tsv = FASTA_CLUSTER_BLAST.out.clusters_tsv + ch_versions = ch_versions.mix( FASTA_CLUSTER_BLAST.out.versions ) - // create input for extracting cluster representatives - ch_extractreps_input = ch_filtered_viruses_combined_fna_gz.join( ch_clusters_tsv ) - - // - // MODULE: Extract cluster representatives - // - ch_anicluster_reps_fasta_gz = ANICLUSTER_EXTRACTREPS ( ch_extractreps_input ).representatives - ch_versions = ch_versions.mix( ANICLUSTER_EXTRACTREPS.out.versions ) } else { - // if skip_virus_clustering == true, use unclustered viruses ch_anicluster_reps_fasta_gz = ch_filtered_viruses_fna_gz ch_clusters_tsv = Channel.empty() } @@ -285,7 +250,6 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Align reads to viruses ------------------------------------------------------------------------------*/ - // if skip_read_alignment == false OR run_instrain == true, run subworkflow if ( !params.skip_read_alignment || params.run_instrain ) { // // MODULE: Make bowtie2 index @@ -306,7 +270,6 @@ workflow PHAGEANNOTATOR { ch_alignment_results_tsv = COVERM_CONTIG ( ch_combined_bams ).alignment_results ch_versions = ch_versions.mix( COVERM_CONTIG.out.versions ) } else { - // if skip_read_alignment == true AND run_instrain == false, skip subworkflow ch_alignment_results_tsv = Channel.empty() if ( !params.skip_instrain ) { error "[nf-core/phageannotator] ERROR: skip_read_alignment = true but skip_instrain = false; read alignment must take place for inStrain to run" @@ -317,7 +280,6 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Assign viral taxonomy ------------------------------------------------------------------------------*/ - // if run_genomad_taxonomy == true run subworkflow if ( params.run_genomad_taxonomy ) { // // SUBWORKFLOW: Assign taxonomy using ICTV taxa specific marker genes @@ -325,7 +287,6 @@ workflow PHAGEANNOTATOR { ch_marker_taxonomy_tsv = GENOMAD_TAXONOMY ( ch_anicluster_reps_fasta_gz, ch_genomad_db_dir ).taxonomy ch_versions = ch_versions.mix( GENOMAD_TAXONOMY.out.versions ) } else { - // if run_genomad_taxonomy == false, skip subworkflow ch_marker_taxonomy_tsv = Channel.empty() } @@ -337,7 +298,6 @@ workflow PHAGEANNOTATOR { ch_anicluster_reps_fasta = GUNZIP_CLUSTER_REPS ( ch_anicluster_reps_fasta_gz ).gunzip ch_versions = ch_versions.mix( GUNZIP_CLUSTER_REPS.out.versions ) - // if run_iphop == true, run subworkflow if ( params.run_iphop ){ // create channel from params.checkv_db if ( !params.iphop_db ){ @@ -352,7 +312,6 @@ workflow PHAGEANNOTATOR { ch_host_predictions_tsv = FASTA_PHAGE_HOST_IPHOP ( ch_anicluster_reps_fasta, ch_iphop_db ).host_predictions_tsv ch_versions = ch_versions.mix( FASTA_PHAGE_HOST_IPHOP.out.versions ) } else { - // if run_iphop == false, skip subworkflow ch_host_predictions_tsv = Channel.empty() } @@ -360,7 +319,6 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Predict virus lifestyle ------------------------------------------------------------------------------*/ - // if run_bacphlip == true, run subworkflow if ( params.run_bacphlip ) { // // MODULE: Predict phage lifestyle using lysogeny specific genes @@ -370,7 +328,6 @@ workflow PHAGEANNOTATOR { // TODO: Add ability to automatically add -meta flag to pharokka when multiple sequences are in input fasta } else { - // if run_bacphlip == false, skip subworkflow ch_bacphlip_lifestyle_tsv = Channel.empty() } @@ -378,7 +335,6 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Phage functional annotation ------------------------------------------------------------------------------*/ - // if run_pharokka == true OR run_instrain == true, run subworkflow if ( params.run_pharokka || params.run_instrain ) { // create channel from params.pharokka_db if ( !params.pharokka_db ){ @@ -416,7 +372,6 @@ workflow PHAGEANNOTATOR { return [ meta, gbk_mod] } } else { - // if run_pharokka == false AND run_instrain == false, skip subworkflow ch_pharokka_gbk_mod = [] ch_pharokka_output_tsv = Channel.empty() } @@ -425,7 +380,7 @@ workflow PHAGEANNOTATOR { /*---------------------------------------------------------------------------- Analyze phage microdiversity ------------------------------------------------------------------------------*/ - // if run_instrain == true, run subworkflow + // TODO: Add option to run instrain/compare within groups rather than across all samples if ( params.run_instrain ) { // // MODULE: Generate instrain scaffold to bin file @@ -439,10 +394,29 @@ workflow PHAGEANNOTATOR { ch_gene_info_tsv = FASTA_MICRODIVERSITY_INSTRAIN ( ch_cluster_rep_alignment_bam, ch_anicluster_reps_fasta, ch_pharokka_gbk_mod, ch_stb_file_tsv ).gene_info_tsv ch_versions = ch_versions = ch_versions.mix(FASTA_MICRODIVERSITY_INSTRAIN.out.versions) } else { - // if run_instrain == false, skip subworkflow ch_gene_info_tsv = Channel.empty() } + /*---------------------------------------------------------------------------- + Predict if proviruses are active + ------------------------------------------------------------------------------*/ + // if run_propagate == true; run subworkflow + if ( params.run_propagate ){ + ch_provirus_activity_tsv = FASTQ_FASTA_PROVIRUS_ACTIVITY_PROPAGATE ( + fastq_gz, + fasta_gz, + ch_virus_summaries_tsv, + ch_quality_summary_tsv, + ch_clusters_tsv, + params.propagate_min_ani, + params.propagate_min_qcov, + params.propagate_min_tcov ).propagate_results_tsv + ch_versions = ch_versions.mix ( FASTQ_FASTA_PROVIRUS_ACTIVITY_PROPAGATE.out.versions ) + } else { + ch_provirus_activity_tsv = Channel.empty() + } + + emit: virus_enrichment_tsv = ch_virus_enrichment_tsv @@ -457,6 +431,7 @@ workflow PHAGEANNOTATOR { // bacphlip_lifestyle_tsv = ch_bacphlip_lifestyle_tsv // Inconsistent hash pharokka_output_tsv = ch_pharokka_output_tsv instrain_gene_info = ch_gene_info_tsv + propagate_results_tsv = ch_provirus_activity_tsv versions = ch_versions } diff --git a/workflows/phageannotator/nextflow.config b/workflows/phageannotator/nextflow.config index 74ac1cb5..6df599b7 100644 --- a/workflows/phageannotator/nextflow.config +++ b/workflows/phageannotator/nextflow.config @@ -9,7 +9,7 @@ includeConfig '../../subworkflows/local/fastq_fasta_contig_extension_cobra/nextf includeConfig '../../subworkflows/local/fasta_virus_quality_checkv/nextflow.config' includeConfig '../../modules/local/qualityfilterviruses/nextflow.config' includeConfig '../../modules/nf-core/multiqc/nextflow.config' -includeConfig '../../subworkflows/local/fasta_all_v_all_blast/nextflow.config' +includeConfig '../../subworkflows/local/fasta_cluster_blast/nextflow.config' includeConfig '../../subworkflows/nf-core/bam_sort_stats_samtools/nextflow.config' includeConfig '../../subworkflows/nf-core/bam_stats_samtools/nextflow.config' includeConfig '../../subworkflows/nf-core/fastq_align_bowtie2/nextflow.config' diff --git a/workflows/phageannotator/tests/full_test.nf.test.snap b/workflows/phageannotator/tests/full_test.nf.test.snap index 1a49c7ee..247803d7 100644 --- a/workflows/phageannotator/tests/full_test.nf.test.snap +++ b/workflows/phageannotator/tests/full_test.nf.test.snap @@ -30,6 +30,47 @@ "test.fasta_w_screen_hits_virus_summary.tsv:md5,f5b59da727b9793461f8b85cf7c818df" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1097ac7d62d13e9e74916d9dc1fabb7b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,2b068b9678f63d6bb06a2a3652e92ba9", + "versions.yml:md5,2d1864a0523e2738f150eaf87d4472d5", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,3c8b8ac5a86d6d7fba86fc61f4d5fca9", + "versions.yml:md5,42ce3c1803e6f3030b60d1d4610b629d", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,478c8fdcebf1357f40c26d7991d85eb4", + "versions.yml:md5,5074b29ede738ba6d39ef056e1cde929", + "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,c0598196e761dbcdc90ff95a2f623b9c", + "versions.yml:md5,d73a3ecea014f00c8d95513555c42d23", + "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", + "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", + "versions.yml:md5,de688faac0a9124c4f2db74ce1b2c795", + "versions.yml:md5,dfa7110695181c4e8daa47d384563588" + ], "2": [ [ { @@ -105,45 +146,7 @@ ] ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1097ac7d62d13e9e74916d9dc1fabb7b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,2b068b9678f63d6bb06a2a3652e92ba9", - "versions.yml:md5,2d1864a0523e2738f150eaf87d4472d5", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,3c8b8ac5a86d6d7fba86fc61f4d5fca9", - "versions.yml:md5,42ce3c1803e6f3030b60d1d4610b629d", - "versions.yml:md5,478c8fdcebf1357f40c26d7991d85eb4", - "versions.yml:md5,5074b29ede738ba6d39ef056e1cde929", - "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,c0598196e761dbcdc90ff95a2f623b9c", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d73a3ecea014f00c8d95513555c42d23", - "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", - "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", - "versions.yml:md5,de688faac0a9124c4f2db74ce1b2c795", - "versions.yml:md5,dfa7110695181c4e8daa47d384563588" + ], "anicluster_reps_fna_gz": [ [ @@ -204,11 +207,13 @@ }, "all_samples_cds_final_merged_output.tsv:md5,26c4d18972b491b05e844cff84fc63ad" ] + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", @@ -219,17 +224,19 @@ "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", "versions.yml:md5,3c8b8ac5a86d6d7fba86fc61f4d5fca9", "versions.yml:md5,42ce3c1803e6f3030b60d1d4610b629d", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,478c8fdcebf1357f40c26d7991d85eb4", "versions.yml:md5,5074b29ede738ba6d39ef056e1cde929", "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", @@ -237,9 +244,8 @@ "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", "versions.yml:md5,c0598196e761dbcdc90ff95a2f623b9c", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", "versions.yml:md5,d73a3ecea014f00c8d95513555c42d23", "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", @@ -294,6 +300,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-16T23:12:59.497752604" + "timestamp": "2024-04-22T14:16:49.688416588" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/main.nf.test.snap b/workflows/phageannotator/tests/main.nf.test.snap index 81e10352..7a96deb3 100644 --- a/workflows/phageannotator/tests/main.nf.test.snap +++ b/workflows/phageannotator/tests/main.nf.test.snap @@ -19,6 +19,31 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" + ], "2": [ [ { @@ -68,29 +93,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + ], "anicluster_reps_fna_gz": [ [ @@ -125,31 +128,34 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" ], "virus_classification_tsv": [ [ @@ -188,6 +194,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:31:38.335678752" + "timestamp": "2024-04-22T13:49:13.168724286" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_bacphlip.nf.test.snap b/workflows/phageannotator/tests/run_bacphlip.nf.test.snap index 6cd5800b..5162ab5e 100644 --- a/workflows/phageannotator/tests/run_bacphlip.nf.test.snap +++ b/workflows/phageannotator/tests/run_bacphlip.nf.test.snap @@ -19,6 +19,32 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,42ce3c1803e6f3030b60d1d4610b629d", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" + ], "2": [ [ { @@ -68,30 +94,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,42ce3c1803e6f3030b60d1d4610b629d", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + ], "anicluster_reps_fna_gz": [ [ @@ -126,32 +129,35 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", "versions.yml:md5,42ce3c1803e6f3030b60d1d4610b629d", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" ], "virus_classification_tsv": [ [ @@ -190,6 +196,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:31:45.457067777" + "timestamp": "2024-04-22T13:50:41.685626178" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_genomad_taxonomy.nf.test.snap b/workflows/phageannotator/tests/run_genomad_taxonomy.nf.test.snap index 014c54a2..f175a187 100644 --- a/workflows/phageannotator/tests/run_genomad_taxonomy.nf.test.snap +++ b/workflows/phageannotator/tests/run_genomad_taxonomy.nf.test.snap @@ -19,6 +19,32 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,d73a3ecea014f00c8d95513555c42d23" + ], "2": [ [ { @@ -73,30 +99,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d73a3ecea014f00c8d95513555c42d23" + ], "anicluster_reps_fna_gz": [ [ @@ -136,31 +139,34 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", "versions.yml:md5,d73a3ecea014f00c8d95513555c42d23" ], "virus_classification_tsv": [ @@ -200,6 +206,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:32:18.068075458" + "timestamp": "2024-04-22T13:51:36.742142002" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_instrain.nf.test.snap b/workflows/phageannotator/tests/run_instrain.nf.test.snap index 4842526c..c91a25e1 100644 --- a/workflows/phageannotator/tests/run_instrain.nf.test.snap +++ b/workflows/phageannotator/tests/run_instrain.nf.test.snap @@ -19,6 +19,38 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,2d1864a0523e2738f150eaf87d4472d5", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,3c8b8ac5a86d6d7fba86fc61f4d5fca9", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", + "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032" + ], "2": [ [ { @@ -84,36 +116,7 @@ ] ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,2d1864a0523e2738f150eaf87d4472d5", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,3c8b8ac5a86d6d7fba86fc61f4d5fca9", - "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", - "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032" + ], "anicluster_reps_fna_gz": [ [ @@ -164,11 +167,13 @@ }, "all_samples_cds_final_merged_output.tsv:md5,db5070088d1bea637525687595c03edb" ] + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", @@ -176,15 +181,17 @@ "versions.yml:md5,2d1864a0523e2738f150eaf87d4472d5", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", "versions.yml:md5,3c8b8ac5a86d6d7fba86fc61f4d5fca9", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", @@ -192,8 +199,7 @@ "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032", "versions.yml:md5,dc43c8404f9f95b46c1a9c8d51687032" ], @@ -234,6 +240,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-16T22:28:04.553758819" + "timestamp": "2024-04-22T14:14:00.873814771" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_iphop.nf.test.snap b/workflows/phageannotator/tests/run_iphop.nf.test.snap index e72ebc3f..bb157f28 100644 --- a/workflows/phageannotator/tests/run_iphop.nf.test.snap +++ b/workflows/phageannotator/tests/run_iphop.nf.test.snap @@ -19,6 +19,34 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1097ac7d62d13e9e74916d9dc1fabb7b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,478c8fdcebf1357f40c26d7991d85eb4", + "versions.yml:md5,5074b29ede738ba6d39ef056e1cde929", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" + ], "2": [ [ { @@ -73,32 +101,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1097ac7d62d13e9e74916d9dc1fabb7b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,478c8fdcebf1357f40c26d7991d85eb4", - "versions.yml:md5,5074b29ede738ba6d39ef056e1cde929", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + ], "anicluster_reps_fna_gz": [ [ @@ -138,34 +141,37 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1097ac7d62d13e9e74916d9dc1fabb7b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,478c8fdcebf1357f40c26d7991d85eb4", "versions.yml:md5,5074b29ede738ba6d39ef056e1cde929", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" ], "virus_classification_tsv": [ [ @@ -204,6 +210,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:32:41.699752723" + "timestamp": "2024-04-22T13:51:25.373241894" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_pharokka.nf.test.snap b/workflows/phageannotator/tests/run_pharokka.nf.test.snap index 117f6cf4..14bbd8d0 100644 --- a/workflows/phageannotator/tests/run_pharokka.nf.test.snap +++ b/workflows/phageannotator/tests/run_pharokka.nf.test.snap @@ -19,6 +19,34 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" + ], "2": [ [ { @@ -73,32 +101,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + ], "anicluster_reps_fna_gz": [ [ @@ -138,25 +141,29 @@ }, "all_samples_cds_final_merged_output.tsv:md5,db5070088d1bea637525687595c03edb" ] + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,5bf636608598d8b495d8b38fcee37dd0", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,716410d7e9dc048a64ada71202cbc23e", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,afc743a69ab04322fbc732b24e18d208", @@ -164,8 +171,7 @@ "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" ], "virus_classification_tsv": [ [ @@ -204,6 +210,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T14:00:35.46649296" + "timestamp": "2024-04-22T14:11:30.358986937" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_propagate.nf.test b/workflows/phageannotator/tests/run_propagate.nf.test new file mode 100644 index 00000000..b61a561a --- /dev/null +++ b/workflows/phageannotator/tests/run_propagate.nf.test @@ -0,0 +1,80 @@ +nextflow_workflow { + + name "Test workflow: PHAGEANNOTATOR" + script "workflows/phageannotator/main.nf" + workflow "PHAGEANNOTATOR" + + // Dependencies + tag "SEQKIT_SEQ" + tag "QUALITYFILTERVIRUSES" + tag "ANICLUSTER_ANICALC" + tag "ANICLUSTER_ANICLUST" + tag "ANICLUSTER_EXTRACTREPS" + tag "COVERM_CONTIG" + tag "FASTQ_VIRUS_ENRICHMENT_VIROMEQC" + tag "FASTA_VIRUS_CLASSIFICATION_GENOMAD" + tag "FASTA_VIRUS_QUALITY_CHECKV" + tag "FASTA_CLUSTER_BLAST" + tag "FASTQ_FASTA_PROVIRUS_ACTIVITY_PROPAGATE" + tag "CAT_CAT" + tag "BOWTIE2_BUILD" + tag "GENOMAD_ENDTOEND" + tag "GUNZIP" + tag "FASTQ_ALIGN_BOWTIE2" + + + test("Parameters: run_propagate = true") { + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz', checkIfExists: true) + ] + ], + [ + [ id:'test2' ], + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz', checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists:true) + ], + [ + [ id:'test2' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists:true) + ] + ) + """ + } + params { + outdir = "$outputDir" + run_viromeqc = true + // parameters to decrease sensitivity for test data + mash_screen_min_score = 0.01 + genomad_min_score = 0.01 + genomad_max_fdr = 1 + // speed up options since tools are fully tested in subworkflows + genomad_disable_nn = true + genomad_sensitivity = 0.1 + checkv_minimal_db = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() }, + { assert path("${outputDir}/VirusAbundance/coverm/contig/all_samples_alignment_results.tsv").readLines().size() == 4 } + ) + } + } +} diff --git a/workflows/phageannotator/tests/run_cobra.nf.test.snap b/workflows/phageannotator/tests/run_propagate.nf.test.snap similarity index 65% rename from workflows/phageannotator/tests/run_cobra.nf.test.snap rename to workflows/phageannotator/tests/run_propagate.nf.test.snap index 5b57a1c3..0b224a88 100644 --- a/workflows/phageannotator/tests/run_cobra.nf.test.snap +++ b/workflows/phageannotator/tests/run_propagate.nf.test.snap @@ -1,9 +1,20 @@ { - "Parameters: run_cobra = true": { + "Parameters: run_propagate = true": { "content": [ { "0": [ - + [ + { + "id": "test2" + }, + "test2.viromeqc.tsv:md5,728ab79cbb6c7afd0dc3f38c5ddfe642" + ], + [ + { + "id": "test" + }, + "test.viromeqc.tsv:md5,b55f2c77b63adc41da980568819d1aae" + ] ], "1": [ [ @@ -19,6 +30,33 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,de688faac0a9124c4f2db74ce1b2c795", + "versions.yml:md5,dfa7110695181c4e8daa47d384563588" + ], "2": [ [ { @@ -68,49 +106,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,23d3ff61622483893bde6c53b291d178", - "versions.yml:md5,23d3ff61622483893bde6c53b291d178", - "versions.yml:md5,273722cb429ff390ed5fbb09c270c12c", - "versions.yml:md5,29077f60120b69bdbdb108e3e0f0a2b4", - "versions.yml:md5,29077f60120b69bdbdb108e3e0f0a2b4", - "versions.yml:md5,29e8e088f5688e61c5d396a70da43042", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,40fb32b1e3b7113ced7faf5b7adfb957", - "versions.yml:md5,40fb32b1e3b7113ced7faf5b7adfb957", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,75fada105167dd56003ab6a45fcdaa7c", - "versions.yml:md5,75fada105167dd56003ab6a45fcdaa7c", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,770c6885cf39f659d9bfab2cbede934c", - "versions.yml:md5,770c6885cf39f659d9bfab2cbede934c", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,8cc447f908cab7a36464c3b49dc0c50f", - "versions.yml:md5,8cc447f908cab7a36464c3b49dc0c50f", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a1dc4f2e0f6749f3c726622dab1cf194", - "versions.yml:md5,a1dc4f2e0f6749f3c726622dab1cf194", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,c1343b9b208c51fbf077dc9ff8024bb9", - "versions.yml:md5,c1343b9b208c51fbf077dc9ff8024bb9", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,fb73c0d103eb42772876d3eeb571e077", - "versions.yml:md5,fb73c0d103eb42772876d3eeb571e077" + ], "anicluster_reps_fna_gz": [ [ @@ -145,51 +141,36 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,23d3ff61622483893bde6c53b291d178", - "versions.yml:md5,23d3ff61622483893bde6c53b291d178", - "versions.yml:md5,273722cb429ff390ed5fbb09c270c12c", - "versions.yml:md5,29077f60120b69bdbdb108e3e0f0a2b4", - "versions.yml:md5,29077f60120b69bdbdb108e3e0f0a2b4", - "versions.yml:md5,29e8e088f5688e61c5d396a70da43042", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,40fb32b1e3b7113ced7faf5b7adfb957", - "versions.yml:md5,40fb32b1e3b7113ced7faf5b7adfb957", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,75fada105167dd56003ab6a45fcdaa7c", - "versions.yml:md5,75fada105167dd56003ab6a45fcdaa7c", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,770c6885cf39f659d9bfab2cbede934c", - "versions.yml:md5,770c6885cf39f659d9bfab2cbede934c", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,8cc447f908cab7a36464c3b49dc0c50f", - "versions.yml:md5,8cc447f908cab7a36464c3b49dc0c50f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a1dc4f2e0f6749f3c726622dab1cf194", - "versions.yml:md5,a1dc4f2e0f6749f3c726622dab1cf194", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,c1343b9b208c51fbf077dc9ff8024bb9", - "versions.yml:md5,c1343b9b208c51fbf077dc9ff8024bb9", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,fb73c0d103eb42772876d3eeb571e077", - "versions.yml:md5,fb73c0d103eb42772876d3eeb571e077" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,de688faac0a9124c4f2db74ce1b2c795", + "versions.yml:md5,dfa7110695181c4e8daa47d384563588" ], "virus_classification_tsv": [ [ @@ -206,7 +187,18 @@ ] ], "virus_enrichment_tsv": [ - + [ + { + "id": "test2" + }, + "test2.viromeqc.tsv:md5,728ab79cbb6c7afd0dc3f38c5ddfe642" + ], + [ + { + "id": "test" + }, + "test.viromeqc.tsv:md5,b55f2c77b63adc41da980568819d1aae" + ] ], "virus_quality_tsv": [ [ @@ -222,13 +214,12 @@ "quality_summary.tsv:md5,5f1b0ace5264c11b2f5e8b0decbf2f9e" ] ] - }, - "COBRA_joining_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + } ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-04-18T16:48:03.595326882" + "timestamp": "2024-04-22T13:53:10.21158494" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_reference.nf.test.snap b/workflows/phageannotator/tests/run_reference.nf.test.snap index 170cfa96..fc57218e 100644 --- a/workflows/phageannotator/tests/run_reference.nf.test.snap +++ b/workflows/phageannotator/tests/run_reference.nf.test.snap @@ -19,6 +19,33 @@ "test.fasta_w_screen_hits_virus_summary.tsv:md5,f5b59da727b9793461f8b85cf7c818df" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,2b068b9678f63d6bb06a2a3652e92ba9", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,c0598196e761dbcdc90ff95a2f623b9c" + ], "2": [ [ { @@ -68,31 +95,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,2b068b9678f63d6bb06a2a3652e92ba9", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,c0598196e761dbcdc90ff95a2f623b9c", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + ], "anicluster_reps_fna_gz": [ [ @@ -127,33 +130,36 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,2b068b9678f63d6bb06a2a3652e92ba9", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,c0598196e761dbcdc90ff95a2f623b9c", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,c0598196e761dbcdc90ff95a2f623b9c" ], "virus_classification_tsv": [ [ @@ -192,6 +198,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:33:22.171793475" + "timestamp": "2024-04-22T13:50:31.530836284" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/run_viromeqc.nf.test.snap b/workflows/phageannotator/tests/run_viromeqc.nf.test.snap index e77a501a..17b0536f 100644 --- a/workflows/phageannotator/tests/run_viromeqc.nf.test.snap +++ b/workflows/phageannotator/tests/run_viromeqc.nf.test.snap @@ -30,6 +30,33 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", + "versions.yml:md5,de688faac0a9124c4f2db74ce1b2c795", + "versions.yml:md5,dfa7110695181c4e8daa47d384563588" + ], "2": [ [ { @@ -79,31 +106,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,de688faac0a9124c4f2db74ce1b2c795", - "versions.yml:md5,dfa7110695181c4e8daa47d384563588" + ], "anicluster_reps_fna_gz": [ [ @@ -138,31 +141,34 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8", "versions.yml:md5,de688faac0a9124c4f2db74ce1b2c795", "versions.yml:md5,dfa7110695181c4e8daa47d384563588" ], @@ -214,6 +220,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:38:44.56292737" + "timestamp": "2024-04-22T13:51:35.228333569" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/skip_checkv.nf.test.snap b/workflows/phageannotator/tests/skip_checkv.nf.test.snap index ed3c158b..f3d76b3d 100644 --- a/workflows/phageannotator/tests/skip_checkv.nf.test.snap +++ b/workflows/phageannotator/tests/skip_checkv.nf.test.snap @@ -19,6 +19,29 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" + ], "2": [ ], @@ -57,27 +80,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + ], "anicluster_reps_fna_gz": [ [ @@ -112,29 +115,32 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" ], "virus_classification_tsv": [ [ @@ -162,6 +168,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:35:17.957528153" + "timestamp": "2024-04-22T13:54:15.532612908" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/skip_genomad.nf.test.snap b/workflows/phageannotator/tests/skip_genomad.nf.test.snap index 66f6883f..df0bbf91 100644 --- a/workflows/phageannotator/tests/skip_genomad.nf.test.snap +++ b/workflows/phageannotator/tests/skip_genomad.nf.test.snap @@ -7,6 +7,30 @@ ], "1": [ + ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" ], "2": [ [ @@ -57,28 +81,7 @@ ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + ], "anicluster_reps_fna_gz": [ [ @@ -113,30 +116,33 @@ ], "pharokka_output_tsv": [ + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,04bf25e5231ce1f8077581bb7a2c07f4", "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", "versions.yml:md5,1439e948f383899aaccbf49506923b04", "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,4696eb66d0d16ad990a587f48da57ed5", + "versions.yml:md5,6ac6b51480977632d667d706054442c4", "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,769114e3bb009aceb4763a1a0fa80d5a", "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,8aebba726df29aad131772449019ee78", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", + "versions.yml:md5,91b576228bdf87ee5c9c0164bf81aaa1", "versions.yml:md5,987d2e54a85311b917599dc41c90418c", "versions.yml:md5,a856eefde99260259440d942c1d56ff4", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072", - "versions.yml:md5,d1e2a23440cc3dbeedc1c628bf792072" + "versions.yml:md5,be6e7650865785770e04bf2bf448cec8" ], "virus_classification_tsv": [ @@ -164,6 +170,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-17T13:34:20.139289774" + "timestamp": "2024-04-22T13:51:50.01837491" } } \ No newline at end of file diff --git a/workflows/phageannotator/tests/skip_virus_clustering.nf.test.snap b/workflows/phageannotator/tests/skip_virus_clustering.nf.test.snap index 703875ab..c083586d 100644 --- a/workflows/phageannotator/tests/skip_virus_clustering.nf.test.snap +++ b/workflows/phageannotator/tests/skip_virus_clustering.nf.test.snap @@ -3,7 +3,7 @@ "content": [ { "0": [ - + ], "1": [ [ @@ -19,6 +19,28 @@ "test_virus_summary.tsv:md5,ae6d66b39f6ea43628db461e42012027" ] ], + "10": [ + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", + "versions.yml:md5,1439e948f383899aaccbf49506923b04", + "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", + "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", + "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", + "versions.yml:md5,987d2e54a85311b917599dc41c90418c", + "versions.yml:md5,a856eefde99260259440d942c1d56ff4", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", + "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b" + ], "2": [ [ { @@ -62,38 +84,19 @@ ] ], "5": [ - + ], "6": [ - + ], "7": [ - + ], "8": [ - + ], "9": [ - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0c310e8a973612ff99e0df847fb656ea", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,0e57d76cb0b6149bbb32aded3595de9b", - "versions.yml:md5,1439e948f383899aaccbf49506923b04", - "versions.yml:md5,374c63bf7c31c6c34acbf8d74e6c7dcc", - "versions.yml:md5,6323b255a9c1b9ed3e1a41aea5bfa2bb", - "versions.yml:md5,74b2068d24b17a2f4a097a3063da3dae", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7cb8c67c6ff9cf01f799d6730d5ebd15", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,7e54b5f822df67b99faf67629e81963f", - "versions.yml:md5,987d2e54a85311b917599dc41c90418c", - "versions.yml:md5,a856eefde99260259440d942c1d56ff4", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,b6a0d868f5f2be1093be05fc0638008d", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b", - "versions.yml:md5,bcf62c8c784b9f46c8372e86654cae6b" + ], "anicluster_reps_fna_gz": [ [ @@ -124,16 +127,19 @@ ] ], "host_predictions_tsv": [ - + ], "instrain_gene_info": [ - + ], "marker_taxonomy_tsv": [ - + ], "pharokka_output_tsv": [ - + + ], + "propagate_results_tsv": [ + ], "versions": [ "versions.yml:md5,005aa3fca95c916b586f09b1c12aae6f", @@ -172,7 +178,7 @@ ] ], "virus_enrichment_tsv": [ - + ], "virus_quality_tsv": [ [ @@ -194,6 +200,6 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-29T14:58:48.608848512" + "timestamp": "2024-04-22T13:54:21.718782526" } -} +} \ No newline at end of file