Calculate coverage statistics after mapping short-read sequenced isolates to a reference sequence
git clone https://github.com/kylegontjes/CoverageStats.git
bash index_reference_genome.sh [reference_genome.fasta]
module load Bioinformatics
module load bwa
bwa index [reference_genome.fasta]
module load samtools
samtools faidx [reference_genome.fasta]
module load gatk
gatk CreateSequenceDictionary -R [reference_genome.fasta]
path="/nfs/esnitkin/Project_Penn_KPC/Sequence_data/fastq/Penn/SRA_submission/"
sample_id="sample_id"
sample_names=$(ls -1 $path | grep _R1 | cut -d. -f1 | sed 's/_R1//')
echo -e\n $sample_id $sample_names | tr ' ' '\n' > config/sample.tsv
3. Edit the config/cluster.json file with your account, email address (email), and desired walltime (walltime)
4. Update the config/config.yaml or config/config_pretrimmed.yaml with sample list (samples), results folder name (prefix), input reads' path (input_reads), and the reference genome's path (reference_genome)
module load singularity module load snakemake
snakemake -s CoverageStats.smk --dryrun -p
sbatch CoverageStats.sbat
sbatch CoverageStats_pretrimmed.sbat
snakemake -s CoverageStats.smk --use-singularity -j 999 --cluster "sbatch -A {cluster.account} -p {cluster.partition} -N {cluster.nodes} -t {cluster.walltime} -c {cluster.procs} --mem-per-cpu {cluster.pmem} --output=slurm_out/slurm-%j.out" --cluster-config config/cluster.json --configfile config/config.yaml --latency-wait 30 --keep-going
snakemake -s CoverageStats_pretrimmed.smk --use-singularity -j 999 --cluster "sbatch -A {cluster.account} -p {cluster.partition} -N {cluster.nodes} -t {cluster.walltime} -c {cluster.procs} --mem-per-cpu {cluster.pmem} --output=slurm_out/slurm-%j.out" --cluster-config config/cluster.json --configfile config/config_pretrimmed.yaml --latency-wait 30 --keep-going