diff --git a/phylogenetic/all-clades/Snakefile b/phylogenetic/all-clades/Snakefile new file mode 100644 index 00000000..2f19fec9 --- /dev/null +++ b/phylogenetic/all-clades/Snakefile @@ -0,0 +1,14 @@ +configfile: os.path.join(workflow.basedir, "../defaults/mpxv/config.yaml") + + +if os.path.exists("config.yaml"): + + configfile: "config.yaml" + + +include: "../rules/main.smk" + + +rule _all: + input: + rules.all.input, diff --git a/phylogenetic/clade-i/Snakefile b/phylogenetic/clade-i/Snakefile new file mode 100644 index 00000000..8ce60473 --- /dev/null +++ b/phylogenetic/clade-i/Snakefile @@ -0,0 +1,14 @@ +configfile: os.path.join(workflow.basedir, "../defaults/clade-i/config.yaml") + + +if os.path.exists("config.yaml"): + + configfile: "config.yaml" + + +include: "../rules/main.smk" + + +rule _all: + input: + rules.all.input, diff --git a/phylogenetic/clade-iib/Snakefile b/phylogenetic/clade-iib/Snakefile new file mode 100644 index 00000000..0d0fadf4 --- /dev/null +++ b/phylogenetic/clade-iib/Snakefile @@ -0,0 +1,14 @@ +configfile: os.path.join(workflow.basedir, "../defaults/hmpxv1/config.yaml") + + +if os.path.exists("config.yaml"): + + configfile: "config.yaml" + + +include: "../rules/main.smk" + + +rule _all: + input: + rules.all.input, diff --git a/phylogenetic/defaults/clade-i/config.yaml b/phylogenetic/defaults/clade-i/config.yaml index 82b6170c..68c32272 100644 --- a/phylogenetic/defaults/clade-i/config.yaml +++ b/phylogenetic/defaults/clade-i/config.yaml @@ -1,15 +1,15 @@ -reference: "defaults/clade-i/reference.fasta" -genome_annotation: "defaults/clade-i/genome_annotation.gff3" -genbank_reference: "defaults/clade-i/reference.gb" -include: "defaults/clade-i/include.txt" -exclude: "defaults/exclude.txt" -clades: "defaults/clades.tsv" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -auspice_config: "defaults/clade-i/auspice_config.json" -description: "defaults/description.md" -tree_mask: "defaults/clade-i/tree_mask.tsv" +reference: "clade-i/reference.fasta" +genome_annotation: "clade-i/genome_annotation.gff3" +genbank_reference: "clade-i/reference.gb" +include: "clade-i/include.txt" +exclude: "exclude.txt" +clades: "clades.tsv" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +auspice_config: "clade-i/auspice_config.json" +description: "description.md" +tree_mask: "clade-i/tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -59,7 +59,7 @@ recency: true mask: from_beginning: 800 from_end: 6422 - maskfile: "defaults/clade-i/mask.bed" + maskfile: "clade-i/mask.bed" colors: ignore_categories: diff --git a/phylogenetic/defaults/hmpxv1/config.yaml b/phylogenetic/defaults/hmpxv1/config.yaml index 6f95b154..c388c913 100644 --- a/phylogenetic/defaults/hmpxv1/config.yaml +++ b/phylogenetic/defaults/hmpxv1/config.yaml @@ -1,15 +1,15 @@ -reference: "defaults/reference.fasta" -genome_annotation: "defaults/genome_annotation.gff3" -genbank_reference: "defaults/reference.gb" -include: "defaults/hmpxv1/include.txt" -exclude: "defaults/exclude.txt" -clades: "defaults/clades.tsv" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -auspice_config: "defaults/hmpxv1/auspice_config.json" -description: "defaults/description.md" -tree_mask: "defaults/tree_mask.tsv" +reference: "reference.fasta" +genome_annotation: "genome_annotation.gff3" +genbank_reference: "reference.gb" +include: "hmpxv1/include.txt" +exclude: "exclude.txt" +clades: "clades.tsv" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +auspice_config: "hmpxv1/auspice_config.json" +description: "description.md" +tree_mask: "tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -101,4 +101,4 @@ recency: true mask: from_beginning: 800 from_end: 6422 - maskfile: "defaults/mask.bed" + maskfile: "mask.bed" diff --git a/phylogenetic/defaults/hmpxv1_big/config.yaml b/phylogenetic/defaults/hmpxv1_big/config.yaml index 0e8220dd..83afeeaa 100644 --- a/phylogenetic/defaults/hmpxv1_big/config.yaml +++ b/phylogenetic/defaults/hmpxv1_big/config.yaml @@ -1,15 +1,15 @@ -reference: "defaults/reference.fasta" -genome_annotation: "defaults/genome_annotation.gff3" -genbank_reference: "defaults/reference.gb" -include: "defaults/hmpxv1_big/include.txt" -exclude: "defaults/exclude.txt" -clades: "defaults/clades.tsv" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -auspice_config: "defaults/hmpxv1_big/auspice_config.json" -description: "defaults/description.md" -tree_mask: "defaults/tree_mask.tsv" +reference: "reference.fasta" +genome_annotation: "genome_annotation.gff3" +genbank_reference: "reference.gb" +include: "hmpxv1_big/include.txt" +exclude: "exclude.txt" +clades: "clades.tsv" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +auspice_config: "hmpxv1_big/auspice_config.json" +description: "description.md" +tree_mask: "tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -64,4 +64,4 @@ recency: true mask: from_beginning: 800 from_end: 6422 - maskfile: "defaults/mask.bed" + maskfile: "mask.bed" diff --git a/phylogenetic/defaults/mpxv/config.yaml b/phylogenetic/defaults/mpxv/config.yaml index 7f39b283..7592b023 100644 --- a/phylogenetic/defaults/mpxv/config.yaml +++ b/phylogenetic/defaults/mpxv/config.yaml @@ -1,15 +1,15 @@ -auspice_config: "defaults/mpxv/auspice_config.json" -include: "defaults/mpxv/include.txt" -exclude: "defaults/exclude.txt" -reference: "defaults/reference.fasta" -genome_annotation: "defaults/genome_annotation.gff3" -genbank_reference: "defaults/reference.gb" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -description: "defaults/description.md" -clades: "defaults/clades.tsv" -tree_mask: "defaults/tree_mask.tsv" +auspice_config: "mpxv/auspice_config.json" +include: "mpxv/include.txt" +exclude: "exclude.txt" +reference: "reference.fasta" +genome_annotation: "genome_annotation.gff3" +genbank_reference: "reference.gb" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +description: "description.md" +clades: "clades.tsv" +tree_mask: "tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -94,4 +94,4 @@ recency: true mask: from_beginning: 1350 from_end: 6422 - maskfile: "defaults/mask_overview.bed" + maskfile: "mask_overview.bed" diff --git a/phylogenetic/lineage-b.1/Snakefile b/phylogenetic/lineage-b.1/Snakefile new file mode 100644 index 00000000..8a3a1f76 --- /dev/null +++ b/phylogenetic/lineage-b.1/Snakefile @@ -0,0 +1,14 @@ +configfile: os.path.join(workflow.basedir, "../defaults/hmpxv1_big/config.yaml") + + +if os.path.exists("config.yaml"): + + configfile: "config.yaml" + + +include: "../rules/main.smk" + + +rule _all: + input: + rules.all.input, diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index a94433f3..67a76c07 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -60,7 +60,7 @@ rule translate: input: tree=build_dir + "/{build_name}/tree.nwk", node_data=build_dir + "/{build_name}/nt_muts.json", - genome_annotation=config["genome_annotation"], + genome_annotation=phylo_resolve_config_path(config["genome_annotation"]), output: node_data=build_dir + "/{build_name}/aa_muts.json", log: @@ -120,7 +120,7 @@ rule clades: tree=build_dir + "/{build_name}/tree.nwk", aa_muts=build_dir + "/{build_name}/aa_muts.json", nuc_muts=build_dir + "/{build_name}/nt_muts.json", - clades=config["clades"], + clades=phylo_resolve_config_path(config["clades"]), output: node_data=build_dir + "/{build_name}/clades_raw.json", log: @@ -154,7 +154,7 @@ rule rename_clades: r""" exec &> >(tee {log:q}) - python scripts/clades_renaming.py \ + python {workflow.basedir}/../scripts/clades_renaming.py \ --input-node-data {input:q} \ --output-node-data {output.node_data:q} """ @@ -180,7 +180,7 @@ rule assign_clades_via_metadata: r""" exec &> >(tee {log:q}) - python scripts/assign-clades-via-metadata.py \ + python {workflow.basedir}/../scripts/assign-clades-via-metadata.py \ --metadata {input.metadata:q} \ --tree {input.tree:q} \ --output-node-data {output.node_data:q} @@ -201,7 +201,7 @@ rule mutation_context: r""" exec &> >(tee {log:q}) - python3 scripts/mutation_context.py \ + python3 {workflow.basedir}/../scripts/mutation_context.py \ --tree {input.tree:q} \ --mutations {input.node_data:q} \ --output {output.node_data:q} @@ -226,7 +226,7 @@ rule recency: r""" exec &> >(tee {log:q}) - python3 scripts/construct-recency-from-submission-date.py \ + python3 {workflow.basedir}/../scripts/construct-recency-from-submission-date.py \ --metadata {input.metadata:q} \ --metadata-id-columns {params.strain_id:q} \ --output {output:q} 2>&1 diff --git a/phylogenetic/rules/config.smk b/phylogenetic/rules/config.smk index 0d94d6b4..92d64559 100644 --- a/phylogenetic/rules/config.smk +++ b/phylogenetic/rules/config.smk @@ -7,6 +7,23 @@ from textwrap import dedent, indent from typing import Union +include: "../../shared/vendored/snakemake/config.smk" + + +def phylo_resolve_config_path(path: str) -> Callable[[Wildcards], str]: + """ + Wrapper around the shared `resolve_config_path` to force the default directory + to be `phylogenetic/defaults`. This is necessary because the entry point for + each build is nested within phylogenetic (e.g. phylogenetic/clade-i/Snakefile). + """ + PHYLO_DEFAULTS_DIR = os.path.normpath(os.path.join(workflow.current_basedir, "../defaults")) + # Strip the `defaults/` prefix to be backwards compatible with older configs + # This is necessary in this wrapper because we are providing a custom defaults dir + # which skips the handling of the defaults/ prefix within resolve_config_path. + path = path.removeprefix("defaults/") + return resolve_config_path(path, PHYLO_DEFAULTS_DIR) + + def as_list(config_param: Union[list,str]) -> list: if isinstance(config_param, list): return config_param diff --git a/phylogenetic/rules/construct_phylogeny.smk b/phylogenetic/rules/construct_phylogeny.smk index f9609cff..96575901 100644 --- a/phylogenetic/rules/construct_phylogeny.smk +++ b/phylogenetic/rules/construct_phylogeny.smk @@ -21,7 +21,7 @@ rule tree: """ input: alignment=build_dir + "/{build_name}/masked.fasta", - tree_mask=config["tree_mask"], + tree_mask=phylo_resolve_config_path(config["tree_mask"]), output: tree=build_dir + "/{build_name}/tree_raw.nwk", threads: workflow.cores @@ -64,7 +64,7 @@ rule fix_tree: r""" exec &> >(tee {log:q}) - python3 scripts/fix_tree.py \ + python3 {workflow.basedir}/../scripts/fix_tree.py \ --alignment {input.alignment:q} \ --input-tree {input.tree:q} \ {params.root} \ diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index 01ed2699..b95ee89c 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -43,7 +43,7 @@ rule remove_time: r""" exec &> >(tee {log:q}) - python3 scripts/remove_timeinfo.py \ + python3 {workflow.basedir}/../scripts/remove_timeinfo.py \ --input-node-data {input:q} \ --output-node-data {output:q} """ @@ -51,8 +51,8 @@ rule remove_time: rule colors: input: - ordering=config["color_ordering"], - color_schemes=config["color_scheme"], + ordering=phylo_resolve_config_path(config["color_ordering"]), + color_schemes=phylo_resolve_config_path(config["color_scheme"]), metadata=build_dir + "/{build_name}/metadata.tsv", output: colors=build_dir + "/{build_name}/colors.tsv", @@ -66,7 +66,7 @@ rule colors: r""" exec &> >(tee {log:q}) - python3 scripts/assign-colors.py \ + python3 {workflow.basedir}/../scripts/assign-colors.py \ --ordering {input.ordering:q} \ --color-schemes {input.color_schemes:q} \ --output {output.colors:q} \ @@ -102,9 +102,9 @@ rule export: else [] ), colors=build_dir + "/{build_name}/colors.tsv", - lat_longs=config["lat_longs"], - description=config["description"], - auspice_config=config["auspice_config"], + lat_longs=phylo_resolve_config_path(config["lat_longs"]), + description=phylo_resolve_config_path(config["description"]), + auspice_config=phylo_resolve_config_path(config["auspice_config"]), output: auspice_json=build_dir + "/{build_name}/tree.json", root_sequence=build_dir + "/{build_name}/tree_root-sequence.json", diff --git a/phylogenetic/Snakefile b/phylogenetic/rules/main.smk similarity index 72% rename from phylogenetic/Snakefile rename to phylogenetic/rules/main.smk index f9a67e9d..87fabf6b 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/rules/main.smk @@ -10,10 +10,6 @@ if version.parse(augur_version) < version.parse(min_augur_version): ) sys.exit(1) -if not config: - - configfile: "defaults/hmpxv1/config.yaml" - build_dir = "results" auspice_dir = "auspice" @@ -38,18 +34,24 @@ rule all: """ -include: "rules/config.smk" -include: "rules/prepare_sequences.smk" -include: "rules/construct_phylogeny.smk" -include: "rules/annotate_phylogeny.smk" -include: "rules/export.smk" +include: "config.smk" +include: "prepare_sequences.smk" +include: "construct_phylogeny.smk" +include: "annotate_phylogeny.smk" +include: "export.smk" # Include custom rules defined in the config. if "custom_rules" in config: for rule_file in config["custom_rules"]: - include: rule_file + # Relative custom rule paths in the config are relative to the analysis + # directory (i.e. the current working directory, or workdir, usually + # given by --directory), but the "include" directive treats relative + # paths as relative to the workflow (e.g. workflow.current_basedir). + # Convert to an absolute path based on the analysis/current directory + # to avoid this mismatch of expectations. + include: os.path.join(os.getcwd(), rule_file) rule clean: diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 9b7abc02..70f627be 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -69,7 +69,7 @@ rule filter: input: sequences="data/sequences.fasta", metadata="data/metadata.tsv", - exclude=config["exclude"], + exclude=phylo_resolve_config_path(config["exclude"]), output: sequences=build_dir + "/{build_name}/good_sequences.fasta", metadata=build_dir + "/{build_name}/good_metadata.tsv", @@ -128,7 +128,7 @@ rule add_private_data: r""" exec &> >(tee {log:q}) - python3 scripts/combine_data_sources.py \ + python3 {workflow.basedir}/../scripts/combine_data_sources.py \ --metadata nextstrain={input.metadata:q} private={input.private_metadata:q} \ --sequences {input.sequences:q} {input.private_sequences:q} \ --output-metadata {output.metadata:q} \ @@ -182,7 +182,7 @@ rule combine_samples: if config.get("private_metadata", False) else build_dir + "/{build_name}/good_metadata.tsv" ), - include=config["include"], + include=phylo_resolve_config_path(config["include"]), output: sequences=build_dir + "/{build_name}/filtered.fasta", metadata=build_dir + "/{build_name}/metadata.tsv", @@ -221,7 +221,7 @@ rule reverse_reverse_complements: r""" exec &> >(tee {log:q}) - python3 scripts/reverse_reversed_sequences.py \ + python3 {workflow.basedir}/../scripts/reverse_reversed_sequences.py \ --metadata {input.metadata:q} \ --sequences {input.sequences:q} \ --output {output:q} @@ -234,8 +234,8 @@ rule align: """ input: sequences=build_dir + "/{build_name}/reversed.fasta", - reference=config["reference"], - genome_annotation=config["genome_annotation"], + reference=phylo_resolve_config_path(config["reference"]), + genome_annotation=phylo_resolve_config_path(config["genome_annotation"]), output: alignment=build_dir + "/{build_name}/aligned.fasta", params: @@ -279,7 +279,7 @@ rule mask: """ input: sequences=build_dir + "/{build_name}/aligned.fasta", - mask=config["mask"]["maskfile"], + mask=phylo_resolve_config_path(config["mask"]["maskfile"]), output: build_dir + "/{build_name}/masked.fasta", params: