Skip to content

Commit 0347a4e

Browse files
committed
Add missing files
1 parent 86c6ec1 commit 0347a4e

File tree

4 files changed

+160
-0
lines changed

4 files changed

+160
-0
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
2+
# - Only tested with Phred33 quality scores
3+
4+
samples: config/samples.tsv
5+
6+
units: config/units.tsv
7+
8+
9+
10+
#############
11+
### READS ###
12+
#############
13+
trim:
14+
trim:
15+
activate: true
16+
tool: adapterremoval
17+
params: "--trimns --maxns 10 --trimqualities --minlength 30 --mask-degenerate-bases"
18+
19+
# Ignored for SE
20+
collapse:
21+
activate: true
22+
params: "--collapse-conservatively"
23+
24+
derep:
25+
extension:
26+
activate: false
27+
k: 16
28+
params: "ibb=t prefilter=0 el=100 er=100 ecc=f ecco=f ignorebadquality extendrollback=0"
29+
30+
derep:
31+
activate: true
32+
# vsearch or seqkit
33+
tool: seqkit
34+
params: ""
35+
36+
low_complex:
37+
params: "entropy=0.7 entropywindow=30 entropyk=4"
38+
39+
40+
41+
#############
42+
### ALIGN ###
43+
#############
44+
prefilter:
45+
taxa: "Bacteria,Archaea,Viruses"
46+
47+
ref:
48+
prok:
49+
n_shards: 2
50+
path: "data/prok.{n_shard}-of-2.fas.gz"
51+
map:
52+
tool: bowtie2
53+
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal -N 1"
54+
bt2l: False
55+
acc2taxid: "data/prok.acc2taxid.gz"
56+
virus:
57+
n_shards: 1
58+
path: "data/virus.1-of-1.fas.gz"
59+
map:
60+
tool: bowtie2
61+
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
62+
bt2l: False
63+
acc2taxid: "data/virus.acc2taxid.gz"
64+
65+
filter:
66+
saturated_reads:
67+
activate: true
68+
n_alns: 10
69+
70+
bam_filter:
71+
reassign:
72+
activate: false
73+
params: "--iters 0 --min-read-ani 92 --min-read-count 3 --scale 0 --reference-lengths genomes.len.map"
74+
75+
filter:
76+
activate: false
77+
params: "--min-read-ani 92 --min-read-count 3 --min-normalized-entropy 0.6 --min-normalized-gini 0.4 --min-avg-read-ani 94 --reference-lengths genomes.len.map"
78+
79+
lca:
80+
activate: false
81+
params: "--lca-rank genus --reference-lengths genomes.len.map"
82+
83+
taxonomy:
84+
nodes: "data/taxdump/nodes.dmp"
85+
names: "data/taxdump/names.dmp"
86+
87+
metadmg:
88+
damage:
89+
params: "--print_length 15"
90+
91+
lca:
92+
params: "--fix_ncbi 0 --how_many 25 --weight_type 1 --edit_dist_max 10000 --lca_rank genus"
93+
94+
dfit:
95+
params: "--nopt 5 --showfits 2"
96+
97+
98+
euk:
99+
ref:
100+
mitoch:
101+
n_shards: 1
102+
path: "data/mitoch.1-of-1.fas.gz"
103+
map:
104+
tool: bowtie2
105+
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
106+
bt2l: False
107+
acc2taxid: "data/mitoch.acc2taxid.gz"
108+
plastid:
109+
n_shards: 1
110+
path: "data/plastid.1-of-1.fas.gz"
111+
map:
112+
tool: bowtie2
113+
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
114+
bt2l: False
115+
acc2taxid: "data/plastid.acc2taxid.gz"
116+
117+
filter:
118+
saturated_reads:
119+
activate: true
120+
n_alns: 10
121+
122+
bam_filter:
123+
reassign:
124+
activate: false
125+
params: "--iters 0 --min-read-ani 92 --min-read-count 3 --scale 0"
126+
127+
filter:
128+
activate: false
129+
params: "--min-read-ani 92 --min-read-count 3 --min-normalized-entropy 0.6 --min-normalized-gini 0.4 --min-avg-read-ani 92"
130+
131+
lca:
132+
activate: false
133+
params: "--lca-rank genus"
134+
135+
taxonomy:
136+
nodes: "data/taxdump/nodes.dmp"
137+
names: "data/taxdump/names.dmp"
138+
139+
metadmg:
140+
damage:
141+
params: "--print_length 15"
142+
143+
lca:
144+
params: "--fix_ncbi 0 --how_many 15 --sim_score_low 0.95 --weight_type 0 --lca_rank genus"
145+
146+
dfit:
147+
params: "--nopt 5 --showfits 2"
148+
149+
150+
############
151+
## REPORT ##
152+
############
153+
report:
154+
multiqc: "--verbose --cl-config 'custom_logo: data/KU_long.png' --cl-config 'custom_logo_title: CAEG - Center for Ancient Environmental Genomics' --cl-config 'custom_logo_url: https://globe.ku.dk/research/caeg/'"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
sample alias group condition
2+
Lib ancient
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
sample library flowcell lane seq_type library_type material data machine run_n sample_n date center platform adapters
2+
Lib LVsim1 BHXXXXXXXX L001 PE ds DNA data/test_R{Read}.001.fq.gz SIMULATED 0000 S1 2025-10-09 CAEG ILLUMINA AGATCGGAAGAGCACACGTCTGAACTCCAGTCA,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
3+
Lib LVsim1 BHXXXXXXXX L002 PE ds DNA data/test_R{Read}.002.fq.gz SIMULATED 0000 S2 2025-10-09 CAEG ILLUMINA AGATCGGAAGAGCACACGTCTGAACTCCAGTCA,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
4+
Lib LVsim2 BHXXXXXXXX L001 PE ds DNA data/test_R{Read}.003.fq.gz SIMULATED 0000 S3 2025-10-09 CAEG ILLUMINA AGATCGGAAGAGCACACGTCTGAACTCCAGTCA,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT

0 commit comments

Comments
 (0)