From 55d7e35c561a4ff3efe3290432a942af770fd7ee Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 11:46:21 -0700 Subject: [PATCH 01/17] Fix shebangs on R scripts --- bin/proc_CP_output_dauer.R | 2 +- bin/proc_CP_output_toxin.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/proc_CP_output_dauer.R b/bin/proc_CP_output_dauer.R index 92ad0b1..a25647f 100644 --- a/bin/proc_CP_output_dauer.R +++ b/bin/proc_CP_output_dauer.R @@ -1,4 +1,4 @@ -# /usr/bin/Rscript +#!/usr/bin/env Rscript library(fs) library(dplyr) library(tidyr) diff --git a/bin/proc_CP_output_toxin.R b/bin/proc_CP_output_toxin.R index 92ad0b1..a25647f 100644 --- a/bin/proc_CP_output_toxin.R +++ b/bin/proc_CP_output_toxin.R @@ -1,4 +1,4 @@ -# /usr/bin/Rscript +#!/usr/bin/env Rscript library(fs) library(dplyr) library(tidyr) From cf3e4e62e859264ce3f63a7677a895d16967414e Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 11:57:18 -0700 Subject: [PATCH 02/17] Move processes to modules --- main.nf | 173 ++++---------------------------------- modules/cellprofiler.nf | 23 +++++ modules/process_input.nf | 64 ++++++++++++++ modules/process_output.nf | 69 +++++++++++++++ 4 files changed, 171 insertions(+), 158 deletions(-) create mode 100644 modules/cellprofiler.nf create mode 100644 modules/process_input.nf create mode 100644 modules/process_output.nf diff --git a/main.nf b/main.nf index 6ad3fdc..950ae9f 100644 --- a/main.nf +++ b/main.nf @@ -1,7 +1,7 @@ #!/usr/bin/env nextflow -// Use DSL2 -nextflow.preview.dsl=2 +// Using DSL-2 +nextflow.enable.dsl=2 // QUEST nextflow version message if( !nextflow.version.matches('>20.0') ) { @@ -10,6 +10,19 @@ if( !nextflow.version.matches('>20.0') ) { exit 1 } +// INCLUDE modules +include { + config_CP_input_dauer + config_CP_input_toxin +} from './modules/process_input.nf' +include { + proc_CP_output_dauer + proc_CP_output_toxin +} from './modules/process_output.nf' +include { + runCP +} from './modules/cellprofiler.nf' + /* ~ ~ ~ > * PARAMETERS SETUP */ @@ -184,162 +197,6 @@ workflow { } } -/* -~ ~ ~ > * CONFIGURE FILES FOR CELLPROFILER -*/ - -process config_CP_input_dauer { - publishDir "${params.out}/pipeline", mode: 'copy', pattern: "*.cppipe" - publishDir "${params.out}/metadata", mode: 'copy', pattern: "metadata.csv" - publishDir "${params.out}/groups", mode: 'copy', pattern: "groups.tsv" - - input: - tuple file(raw_pipe), val(meta_dir), val(meta), val(model_dir), val(model1), val(model2), - file(config_script), val(project), val(mask), val(group), val(edited_pipe), val(out) - - output: - path "*.cppipe", emit: cp_pipeline_file - path "metadata.csv", emit: metadata_file - path "groups.tsv", emit: groups_file - - - """ - # Configure the raw pipeline for CellProfiler - awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ - awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ - awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ - awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ - awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' > pipeline.cppipe - - # Configure metadata and groups for CellProfiller with config_CP_input.R - Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} - - """ -} - -process config_CP_input_toxin { - publishDir "${params.out}/pipeline", mode: 'copy', pattern: "*.cppipe" - publishDir "${params.out}/metadata", mode: 'copy', pattern: "metadata.csv" - publishDir "${params.out}/groups", mode: 'copy', pattern: "groups.tsv" - - input: - tuple file(raw_pipe), val(meta_dir), val(meta), val(model_dir), val(model1), val(model2), val(model3), val(model4), - file(config_script), val(project), val(mask), val(group), val(edited_pipe), val(out) - - output: - path "*.cppipe", emit: cp_pipeline_file - path "metadata.csv", emit: metadata_file - path "groups.tsv", emit: groups_file - - - """ - # Configure the raw pipeline for CellProfiler - awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ - awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ - awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ - awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ - awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' | \\ - awk '{gsub(/MODEL3_XML_FILE/,"${model3}"); print}' | \\ - awk '{gsub(/MODEL4_XML_FILE/,"${model4}"); print}' > pipeline.cppipe - - # Configure metadata and groups for CellProfiller with config_CP_input.R - Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} - - """ -} - -/* -~ ~ ~ > * RUN CELLPROFILER -*/ - -process runCP { - - label "cellpro" - - input: - tuple val(group), file(pipeline), file(output) - - output: - stdout emit: cp_output //tuple file("*.csv"), file("*.png"), emit: cp_output - - """ - # Run cellprofiler headless - cellprofiler -c -r -p ${pipeline} \ - -g ${group} \ - -o ${output} - - """ -} - -/* -~ ~ ~ > * PROCESS CELLPROFILER OUTPUTS -*/ - -process proc_CP_output_dauer { - - //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" - - input: - tuple val(cp_output), val(out_dir), val(model_name1), val(model_name2), file(proc_CP_out_script) - - output: - //path "*.RData", emit: cp_out_dat - - """ - # remove exisitng directories if present and make fresh - if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi - mkdir ${out_dir}/processed_data - if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi - mkdir ${out_dir}/processed_images - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv - - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv - - # move all the output images to process_images directory END WITH /? - find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; - # Process the CellProfiler output with proc_CP_output.R - Rscript --vanilla ${proc_CP_out_script} ${out_dir} - - """ -} - -process proc_CP_output_toxin { - - //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" - - input: - tuple val(cp_output), val(out_dir), val(model_name1), val(model_name2), - val(model_name3), val(model_name4), file(proc_CP_out_script) - - output: - //path "*.RData", emit: cp_out_dat - - """ - # remove exisitng directories if present and make fresh - if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi - mkdir ${out_dir}/processed_data - if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi - mkdir ${out_dir}/processed_images - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv - - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name3}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name3}.csv - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name4}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name4}.csv - - # move all the output images to process_images directory END WITH /? - find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; - # Process the CellProfiler output with proc_CP_output.R - Rscript --vanilla ${proc_CP_out_script} ${out_dir} - - """ -} - /* ~ ~ ~ > * GENERATE REPORT */ diff --git a/modules/cellprofiler.nf b/modules/cellprofiler.nf new file mode 100644 index 0000000..a3d80ad --- /dev/null +++ b/modules/cellprofiler.nf @@ -0,0 +1,23 @@ +#!/usr/bin/env nextflow + +// Using DSL-2 +nextflow.enable.dsl=2 + +process runCP { + + label "cellpro" + + input: + tuple val(group), file(pipeline), file(output) + + output: + stdout emit: cp_output //tuple file("*.csv"), file("*.png"), emit: cp_output + + """ + # Run cellprofiler headless + cellprofiler -c -r -p ${pipeline} \ + -g ${group} \ + -o ${output} + + """ +} \ No newline at end of file diff --git a/modules/process_input.nf b/modules/process_input.nf new file mode 100644 index 0000000..d49e74a --- /dev/null +++ b/modules/process_input.nf @@ -0,0 +1,64 @@ +#!/usr/bin/env nextflow + +// Using DSL-2 +nextflow.enable.dsl=2 + +process config_CP_input_dauer { + publishDir "${params.out}/pipeline", mode: 'copy', pattern: "*.cppipe" + publishDir "${params.out}/metadata", mode: 'copy', pattern: "metadata.csv" + publishDir "${params.out}/groups", mode: 'copy', pattern: "groups.tsv" + + input: + tuple file(raw_pipe), val(meta_dir), val(meta), val(model_dir), val(model1), val(model2), + file(config_script), val(project), val(mask), val(group), val(edited_pipe), val(out) + + output: + path "*.cppipe", emit: cp_pipeline_file + path "metadata.csv", emit: metadata_file + path "groups.tsv", emit: groups_file + + + """ + # Configure the raw pipeline for CellProfiler + awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ + awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ + awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ + awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ + awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' > pipeline.cppipe + + # Configure metadata and groups for CellProfiller with config_CP_input.R + Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} + + """ +} + +process config_CP_input_toxin { + publishDir "${params.out}/pipeline", mode: 'copy', pattern: "*.cppipe" + publishDir "${params.out}/metadata", mode: 'copy', pattern: "metadata.csv" + publishDir "${params.out}/groups", mode: 'copy', pattern: "groups.tsv" + + input: + tuple file(raw_pipe), val(meta_dir), val(meta), val(model_dir), val(model1), val(model2), val(model3), val(model4), + file(config_script), val(project), val(mask), val(group), val(edited_pipe), val(out) + + output: + path "*.cppipe", emit: cp_pipeline_file + path "metadata.csv", emit: metadata_file + path "groups.tsv", emit: groups_file + + + """ + # Configure the raw pipeline for CellProfiler + awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ + awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ + awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ + awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ + awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' | \\ + awk '{gsub(/MODEL3_XML_FILE/,"${model3}"); print}' | \\ + awk '{gsub(/MODEL4_XML_FILE/,"${model4}"); print}' > pipeline.cppipe + + # Configure metadata and groups for CellProfiller with config_CP_input.R + Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} + + """ +} \ No newline at end of file diff --git a/modules/process_output.nf b/modules/process_output.nf new file mode 100644 index 0000000..b8f9aca --- /dev/null +++ b/modules/process_output.nf @@ -0,0 +1,69 @@ +#!/usr/bin/env nextflow + +// Using DSL-2 +nextflow.enable.dsl=2 + +process proc_CP_output_dauer { + + //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" + + input: + tuple val(cp_output), val(out_dir), val(model_name1), val(model_name2), file(proc_CP_out_script) + + output: + //path "*.RData", emit: cp_out_dat + + """ + # remove exisitng directories if present and make fresh + if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi + mkdir ${out_dir}/processed_data + if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi + mkdir ${out_dir}/processed_images + # find .csv files, concatenate them, and write new file + find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv + + # find .csv files, concatenate them, and write new file + find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv + + # move all the output images to process_images directory END WITH /? + find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; + # Process the CellProfiler output with proc_CP_output.R + Rscript --vanilla ${proc_CP_out_script} ${out_dir} + + """ +} + +process proc_CP_output_toxin { + + //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" + + input: + tuple val(cp_output), val(out_dir), val(model_name1), val(model_name2), + val(model_name3), val(model_name4), file(proc_CP_out_script) + + output: + //path "*.RData", emit: cp_out_dat + + """ + # remove exisitng directories if present and make fresh + if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi + mkdir ${out_dir}/processed_data + if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi + mkdir ${out_dir}/processed_images + # find .csv files, concatenate them, and write new file + find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv + + # find .csv files, concatenate them, and write new file + find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv + # find .csv files, concatenate them, and write new file + find ${out_dir}/CP_output -type f -name '${model_name3}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name3}.csv + # find .csv files, concatenate them, and write new file + find ${out_dir}/CP_output -type f -name '${model_name4}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name4}.csv + + # move all the output images to process_images directory END WITH /? + find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; + # Process the CellProfiler output with proc_CP_output.R + Rscript --vanilla ${proc_CP_out_script} ${out_dir} + + """ +} \ No newline at end of file From 691d9f18b5ae55e8fa6d740b5dda44a9dfe72646 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 12:02:35 -0700 Subject: [PATCH 03/17] Reformat modules for readability --- modules/cellprofiler.nf | 7 +++-- modules/process_input.nf | 66 +++++++++++++++++++++++++++++---------- modules/process_output.nf | 15 +++++++-- 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/modules/cellprofiler.nf b/modules/cellprofiler.nf index a3d80ad..85b286d 100644 --- a/modules/cellprofiler.nf +++ b/modules/cellprofiler.nf @@ -8,10 +8,13 @@ process runCP { label "cellpro" input: - tuple val(group), file(pipeline), file(output) + tuple val(group), + file(pipeline), + file(output) output: - stdout emit: cp_output //tuple file("*.csv"), file("*.png"), emit: cp_output + stdout emit: cp_output + //tuple file("*.csv"), file("*.png"), emit: cp_output """ # Run cellprofiler headless diff --git a/modules/process_input.nf b/modules/process_input.nf index d49e74a..b6ee677 100644 --- a/modules/process_input.nf +++ b/modules/process_input.nf @@ -4,18 +4,34 @@ nextflow.enable.dsl=2 process config_CP_input_dauer { - publishDir "${params.out}/pipeline", mode: 'copy', pattern: "*.cppipe" - publishDir "${params.out}/metadata", mode: 'copy', pattern: "metadata.csv" - publishDir "${params.out}/groups", mode: 'copy', pattern: "groups.tsv" + publishDir "${params.out}/pipeline", + mode: 'copy', + pattern: "*.cppipe" + publishDir "${params.out}/metadata", + mode: 'copy', + pattern: "metadata.csv" + publishDir "${params.out}/groups", + mode: 'copy', + pattern: "groups.tsv" input: - tuple file(raw_pipe), val(meta_dir), val(meta), val(model_dir), val(model1), val(model2), - file(config_script), val(project), val(mask), val(group), val(edited_pipe), val(out) + tuple file(raw_pipe), + val(meta_dir), + val(meta), + val(model_dir), + val(model1), + val(model2), + file(config_script), + val(project), + val(mask), + val(group), + val(edited_pipe), + val(out) output: - path "*.cppipe", emit: cp_pipeline_file - path "metadata.csv", emit: metadata_file - path "groups.tsv", emit: groups_file + path "*.cppipe", emit: cp_pipeline_file + path "metadata.csv", emit: metadata_file + path "groups.tsv", emit: groups_file """ @@ -33,18 +49,36 @@ process config_CP_input_dauer { } process config_CP_input_toxin { - publishDir "${params.out}/pipeline", mode: 'copy', pattern: "*.cppipe" - publishDir "${params.out}/metadata", mode: 'copy', pattern: "metadata.csv" - publishDir "${params.out}/groups", mode: 'copy', pattern: "groups.tsv" + publishDir "${params.out}/pipeline", + mode: 'copy', + pattern: "*.cppipe" + publishDir "${params.out}/metadata", + mode: 'copy', + pattern: "metadata.csv" + publishDir "${params.out}/groups", + mode: 'copy', + pattern: "groups.tsv" input: - tuple file(raw_pipe), val(meta_dir), val(meta), val(model_dir), val(model1), val(model2), val(model3), val(model4), - file(config_script), val(project), val(mask), val(group), val(edited_pipe), val(out) + tuple file(raw_pipe), + val(meta_dir), + val(meta), + val(model_dir), + val(model1), + val(model2), + val(model3), + val(model4), + file(config_script), + val(project), + val(mask), + val(group), + val(edited_pipe), + val(out) output: - path "*.cppipe", emit: cp_pipeline_file - path "metadata.csv", emit: metadata_file - path "groups.tsv", emit: groups_file + path "*.cppipe", emit: cp_pipeline_file + path "metadata.csv", emit: metadata_file + path "groups.tsv", emit: groups_file """ diff --git a/modules/process_output.nf b/modules/process_output.nf index b8f9aca..c5ca01c 100644 --- a/modules/process_output.nf +++ b/modules/process_output.nf @@ -8,7 +8,11 @@ process proc_CP_output_dauer { //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" input: - tuple val(cp_output), val(out_dir), val(model_name1), val(model_name2), file(proc_CP_out_script) + tuple val(cp_output), + val(out_dir), + val(model_name1), + val(model_name2), + file(proc_CP_out_script) output: //path "*.RData", emit: cp_out_dat @@ -38,8 +42,13 @@ process proc_CP_output_toxin { //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" input: - tuple val(cp_output), val(out_dir), val(model_name1), val(model_name2), - val(model_name3), val(model_name4), file(proc_CP_out_script) + tuple val(cp_output), + val(out_dir), + val(model_name1), + val(model_name2), + val(model_name3), + val(model_name4), + file(proc_CP_out_script) output: //path "*.RData", emit: cp_out_dat From a1867ece1a7b97b633f6eece143da7cdbe45fd03 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 12:06:22 -0700 Subject: [PATCH 04/17] Move scripts into templates --- modules/cellprofiler.nf | 9 ++---- modules/process_input.nf | 30 +++----------------- modules/process_output.nf | 45 ++++-------------------------- templates/config_CP_input_dauer.sh | 15 ++++++++++ templates/config_CP_input_toxin.sh | 17 +++++++++++ templates/proc_CP_output_dauer.sh | 21 ++++++++++++++ templates/proc_CP_output_toxin.sh | 25 +++++++++++++++++ templates/runCP.sh | 10 +++++++ 8 files changed, 99 insertions(+), 73 deletions(-) create mode 100644 templates/config_CP_input_dauer.sh create mode 100644 templates/config_CP_input_toxin.sh create mode 100644 templates/proc_CP_output_dauer.sh create mode 100644 templates/proc_CP_output_toxin.sh create mode 100644 templates/runCP.sh diff --git a/modules/cellprofiler.nf b/modules/cellprofiler.nf index 85b286d..78c1bdd 100644 --- a/modules/cellprofiler.nf +++ b/modules/cellprofiler.nf @@ -16,11 +16,6 @@ process runCP { stdout emit: cp_output //tuple file("*.csv"), file("*.png"), emit: cp_output - """ - # Run cellprofiler headless - cellprofiler -c -r -p ${pipeline} \ - -g ${group} \ - -o ${output} - - """ + script: + template 'runCP.sh' } \ No newline at end of file diff --git a/modules/process_input.nf b/modules/process_input.nf index b6ee677..bcc6ab5 100644 --- a/modules/process_input.nf +++ b/modules/process_input.nf @@ -34,18 +34,8 @@ process config_CP_input_dauer { path "groups.tsv", emit: groups_file - """ - # Configure the raw pipeline for CellProfiler - awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ - awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ - awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ - awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ - awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' > pipeline.cppipe - - # Configure metadata and groups for CellProfiller with config_CP_input.R - Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} - - """ + script: + template 'config_CP_input_dauer.sh' } process config_CP_input_toxin { @@ -81,18 +71,6 @@ process config_CP_input_toxin { path "groups.tsv", emit: groups_file - """ - # Configure the raw pipeline for CellProfiler - awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ - awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ - awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ - awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ - awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' | \\ - awk '{gsub(/MODEL3_XML_FILE/,"${model3}"); print}' | \\ - awk '{gsub(/MODEL4_XML_FILE/,"${model4}"); print}' > pipeline.cppipe - - # Configure metadata and groups for CellProfiller with config_CP_input.R - Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} - - """ + script: + template 'config_CP_input_toxin.sh' } \ No newline at end of file diff --git a/modules/process_output.nf b/modules/process_output.nf index c5ca01c..8104adc 100644 --- a/modules/process_output.nf +++ b/modules/process_output.nf @@ -17,24 +17,8 @@ process proc_CP_output_dauer { output: //path "*.RData", emit: cp_out_dat - """ - # remove exisitng directories if present and make fresh - if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi - mkdir ${out_dir}/processed_data - if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi - mkdir ${out_dir}/processed_images - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv - - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv - - # move all the output images to process_images directory END WITH /? - find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; - # Process the CellProfiler output with proc_CP_output.R - Rscript --vanilla ${proc_CP_out_script} ${out_dir} - - """ + script: + template 'proc_CP_output_dauer.sh' } process proc_CP_output_toxin { @@ -53,26 +37,7 @@ process proc_CP_output_toxin { output: //path "*.RData", emit: cp_out_dat - """ - # remove exisitng directories if present and make fresh - if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi - mkdir ${out_dir}/processed_data - if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi - mkdir ${out_dir}/processed_images - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv - - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name3}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name3}.csv - # find .csv files, concatenate them, and write new file - find ${out_dir}/CP_output -type f -name '${model_name4}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name4}.csv - - # move all the output images to process_images directory END WITH /? - find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; - # Process the CellProfiler output with proc_CP_output.R - Rscript --vanilla ${proc_CP_out_script} ${out_dir} - - """ + script: + template 'proc_CP_output_toxin.sh' +} } \ No newline at end of file diff --git a/templates/config_CP_input_dauer.sh b/templates/config_CP_input_dauer.sh new file mode 100644 index 0000000..86c99ee --- /dev/null +++ b/templates/config_CP_input_dauer.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +# Configure the raw pipeline for CellProfiler +awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ +awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ +awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ +awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ +awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' > pipeline.cppipe + +# Configure metadata and groups for CellProfiller with config_CP_input.R +Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} diff --git a/templates/config_CP_input_toxin.sh b/templates/config_CP_input_toxin.sh new file mode 100644 index 0000000..3795051 --- /dev/null +++ b/templates/config_CP_input_toxin.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +# Configure the raw pipeline for CellProfiler +awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ +awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ +awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ +awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ +awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' | \\ +awk '{gsub(/MODEL3_XML_FILE/,"${model3}"); print}' | \\ +awk '{gsub(/MODEL4_XML_FILE/,"${model4}"); print}' > pipeline.cppipe + +# Configure metadata and groups for CellProfiller with config_CP_input.R +Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} diff --git a/templates/proc_CP_output_dauer.sh b/templates/proc_CP_output_dauer.sh new file mode 100644 index 0000000..ea9d4d0 --- /dev/null +++ b/templates/proc_CP_output_dauer.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +# remove exisitng directories if present and make fresh +if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi +mkdir ${out_dir}/processed_data +if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi +mkdir ${out_dir}/processed_images +# find .csv files, concatenate them, and write new file +find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv + +# find .csv files, concatenate them, and write new file +find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv + +# move all the output images to process_images directory END WITH /? +find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; +# Process the CellProfiler output with proc_CP_output.R +Rscript --vanilla ${proc_CP_out_script} ${out_dir} \ No newline at end of file diff --git a/templates/proc_CP_output_toxin.sh b/templates/proc_CP_output_toxin.sh new file mode 100644 index 0000000..ac5a7aa --- /dev/null +++ b/templates/proc_CP_output_toxin.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +# remove exisitng directories if present and make fresh +if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi +mkdir ${out_dir}/processed_data +if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi +mkdir ${out_dir}/processed_images +# find .csv files, concatenate them, and write new file +find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv + +# find .csv files, concatenate them, and write new file +find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv +# find .csv files, concatenate them, and write new file +find ${out_dir}/CP_output -type f -name '${model_name3}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name3}.csv +# find .csv files, concatenate them, and write new file +find ${out_dir}/CP_output -type f -name '${model_name4}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name4}.csv + +# move all the output images to process_images directory END WITH /? +find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; +# Process the CellProfiler output with proc_CP_output.R +Rscript --vanilla ${proc_CP_out_script} ${out_dir} \ No newline at end of file diff --git a/templates/runCP.sh b/templates/runCP.sh new file mode 100644 index 0000000..6fe147f --- /dev/null +++ b/templates/runCP.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +# Run cellprofiler headless +cellprofiler -c -r -p ${pipeline} \ +-g ${group} \ +-o ${output} \ No newline at end of file From 68f0aba992e1b552f790bba493e92c76a3b43628 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 14:03:15 -0700 Subject: [PATCH 05/17] Move dauer and toxin to subworkflows --- main.nf | 88 ++++------------------------------------ modules/dauerWorkflow.nf | 62 ++++++++++++++++++++++++++++ modules/toxinWorkflow.nf | 68 +++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 81 deletions(-) create mode 100644 modules/dauerWorkflow.nf create mode 100644 modules/toxinWorkflow.nf diff --git a/main.nf b/main.nf index 950ae9f..45891f2 100644 --- a/main.nf +++ b/main.nf @@ -12,16 +12,12 @@ if( !nextflow.version.matches('>20.0') ) { // INCLUDE modules include { - config_CP_input_dauer - config_CP_input_toxin -} from './modules/process_input.nf' + dauer_workflow +} from './modules/dauerWorkflow.nf' include { - proc_CP_output_dauer - proc_CP_output_toxin -} from './modules/process_output.nf' -include { - runCP -} from './modules/cellprofiler.nf' + toxin_workflow +} from './modules/toxinWorkflow.nf' + /* ~ ~ ~ > * PARAMETERS SETUP @@ -119,81 +115,11 @@ C E L L P R O F I L E R - N F P I P E L I N E workflow { if("${params.pipeline}" == "dauer") { - // configure inputs for CellProfiler ONLY FOR DAUER NOW NEED TO CHANGE MODELS IF OTHER - config_cp = Channel.fromPath("${params.raw_pipe}") - .combine(Channel.from("${params.metadata_dir}")) - .combine(Channel.from("${params.metadata}")) - .combine(Channel.from("${params.worm_model_dir}")) - .combine(Channel.from(worm_model1)) // edit here - .combine(Channel.from(worm_model2)) // edit here - .combine(Channel.fromPath("${params.bin_dir}/config_CP_input_dauer.R")) - .combine(Channel.from("${params.project}")) - .combine(Channel.from("${params.well_mask}")) - .combine(Channel.from("${params.groups}")) - .combine(Channel.from("${params.edited_pipe}")) - .combine(Channel.from("${params.out}")) | config_CP_input_dauer - //.view() - - // Run CellProfiler - groups = config_CP_input_dauer.out.groups_file - .splitCsv(header:true, sep: "\t") - .map { row -> - [row.group, file("${row.pipeline}"), file("${row.output}")] - } - //.view() - - runCP(groups) - - // Preprocess CellProfiler output files - proc_cp = runCP.out.cp_output - .last() // This ensures that all items are emitted from runCP - .combine(Channel.from("${params.out}")) - .combine(Channel.from(model_name1)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name2)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.fromPath("${params.bin_dir}/proc_CP_output_dauer.R")) - //.view() - - proc_CP_output_dauer(proc_cp) + dauer_workflow() } if("${params.pipeline}" == "toxin") { - // configure inputs for CellProfiler ONLY FOR DAUER NOW NEED TO CHANGE MODELS IF OTHER - config_cp = Channel.fromPath("${params.raw_pipe}") - .combine(Channel.from("${params.metadata_dir}")) - .combine(Channel.from("${params.metadata}")) - .combine(Channel.from("${params.worm_model_dir}")) - .combine(Channel.from(worm_model1)) // edit here - .combine(Channel.from(worm_model2)) // edit here - .combine(Channel.from(worm_model3)) // edit here - .combine(Channel.from(worm_model4)) // edit here - .combine(Channel.fromPath("${params.bin_dir}/config_CP_input_toxin.R")) - .combine(Channel.from("${params.project}")) - .combine(Channel.from("${params.well_mask}")) - .combine(Channel.from("${params.groups}")) - .combine(Channel.from("${params.edited_pipe}")) - .combine(Channel.from("${params.out}")) | config_CP_input_toxin - //.view() - - // Run CellProfiler - groups = config_CP_input_toxin.out.groups_file - .splitCsv(header:true, sep: "\t") - .map { row -> - [row.group, file("${row.pipeline}"), file("${row.output}")] - } - //.view() - - runCP(groups) - - // Preprocess CellProfiler output files - proc_cp = runCP.out.cp_output - .last() // This ensures that all items are emitted from runCP - .combine(Channel.from("${params.out}")) - .combine(Channel.from(model_name1)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name2)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name3)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name4)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.fromPath("${params.bin_dir}/proc_CP_output_toxin.R")) | proc_CP_output_toxin - //.view() + toxin_workflow() } } diff --git a/modules/dauerWorkflow.nf b/modules/dauerWorkflow.nf new file mode 100644 index 0000000..6fcf88f --- /dev/null +++ b/modules/dauerWorkflow.nf @@ -0,0 +1,62 @@ +#!/usr/bin/env nextflow + +// Using DSL-2 +nextflow.enable.dsl=2 + +// INCLUDE modules +include { + config_CP_input_dauer +} from './process_input.nf' +include { + proc_CP_output_dauer +} from './process_output.nf' +include { + runCP +} from './cellprofiler.nf' + +/* +~ ~ ~ > * PARAMETERS SETUP +*/ + +pipe = "dauer-nf" +worm_model1 = "dauerMod.xml" +worm_model2 = "nondauerMod.xml" +model_name1 = "dauerMod_NonOverlappingWorms" +model_name2 = "nondauerMod_NonOverlappingWorms" + +workflow dauer_workflow { + config_cp = Channel.fromPath("${params.raw_pipe}") + .combine(Channel.from("${params.metadata_dir}")) + .combine(Channel.from("${params.metadata}")) + .combine(Channel.from("${params.worm_model_dir}")) + .combine(Channel.from(worm_model1)) // edit here + .combine(Channel.from(worm_model2)) // edit here + .combine(Channel.fromPath("${params.bin_dir}/config_CP_input_dauer.R")) + .combine(Channel.from("${params.project}")) + .combine(Channel.from("${params.well_mask}")) + .combine(Channel.from("${params.groups}")) + .combine(Channel.from("${params.edited_pipe}")) + .combine(Channel.from("${params.out}")) | config_CP_input_dauer + //.view() + + // Run CellProfiler + groups = config_CP_input_dauer.out.groups_file + .splitCsv(header:true, sep: "\t") + .map { row -> + [row.group, file("${row.pipeline}"), file("${row.output}")] + } + //.view() + + runCP(groups) + + // Preprocess CellProfiler output files + proc_cp = runCP.out.cp_output + .last() // This ensures that all items are emitted from runCP + .combine(Channel.from("${params.out}")) + .combine(Channel.from(model_name1)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE + .combine(Channel.from(model_name2)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE + .combine(Channel.fromPath("${params.bin_dir}/proc_CP_output_dauer.R")) + //.view() + + proc_CP_output_dauer(proc_cp) +} \ No newline at end of file diff --git a/modules/toxinWorkflow.nf b/modules/toxinWorkflow.nf new file mode 100644 index 0000000..55dee3a --- /dev/null +++ b/modules/toxinWorkflow.nf @@ -0,0 +1,68 @@ +#!/usr/bin/env nextflow + +// Using DSL-2 +nextflow.enable.dsl=2 + +// INCLUDE modules +include { + config_CP_input_toxin +} from './process_input.nf' +include { + proc_CP_output_toxin +} from './process_output.nf' +include { + runCP +} from './cellprofiler.nf' + +/* +~ ~ ~ > * PARAMETERS SETUP +*/ + +pipe = "toxin-nf" +worm_model1 = "L4_N2_HB101_100w.xml" +worm_model2 = "L2L3_N2_HB101_100w.xml" +worm_model3 = "L1_N2_HB101_100w.xml" +worm_model4 = "MDHD.xml" +model_name1 = "L4_N2_HB101_100w_NonOverlappingWorms" +model_name2 = "L2L3_N2_HB101_100w_NonOverlappingWorms" +model_name3 = "L1_N2_HB101_100w_NonOverlappingWorms" +model_name4 = "MDHD_NonOverlappingWorms" + +workflow toxin_workflow { + config_cp = Channel.fromPath("${params.raw_pipe}") + .combine(Channel.from("${params.metadata_dir}")) + .combine(Channel.from("${params.metadata}")) + .combine(Channel.from("${params.worm_model_dir}")) + .combine(Channel.from(worm_model1)) // edit here + .combine(Channel.from(worm_model2)) // edit here + .combine(Channel.from(worm_model3)) // edit here + .combine(Channel.from(worm_model4)) // edit here + .combine(Channel.fromPath("${params.bin_dir}/config_CP_input_toxin.R")) + .combine(Channel.from("${params.project}")) + .combine(Channel.from("${params.well_mask}")) + .combine(Channel.from("${params.groups}")) + .combine(Channel.from("${params.edited_pipe}")) + .combine(Channel.from("${params.out}")) | config_CP_input_toxin + //.view() + + // Run CellProfiler + groups = config_CP_input_toxin.out.groups_file + .splitCsv(header:true, sep: "\t") + .map { row -> + [row.group, file("${row.pipeline}"), file("${row.output}")] + } + //.view() + + runCP(groups) + + // Preprocess CellProfiler output files + proc_cp = runCP.out.cp_output + .last() // This ensures that all items are emitted from runCP + .combine(Channel.from("${params.out}")) + .combine(Channel.from(model_name1)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE + .combine(Channel.from(model_name2)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE + .combine(Channel.from(model_name3)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE + .combine(Channel.from(model_name4)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE + .combine(Channel.fromPath("${params.bin_dir}/proc_CP_output_toxin.R")) | proc_CP_output_toxin + //.view() +} From 98ee9489b6f43a0cdd3c4d4e0e6c21cdc6bac3d3 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 14:04:57 -0700 Subject: [PATCH 06/17] Clean up options --- main.nf | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/main.nf b/main.nf index 45891f2..e91bdd0 100644 --- a/main.nf +++ b/main.nf @@ -29,23 +29,7 @@ date = new Date().format('yyyyMMdd') // Setup pipeline parameter params.pipeline = null -if("${params.pipeline}" == "dauer") { - pipe = "dauer-nf" - worm_model1 = "dauerMod.xml" - worm_model2 = "nondauerMod.xml" - model_name1 = "dauerMod_NonOverlappingWorms" - model_name2 = "nondauerMod_NonOverlappingWorms" -} else if( "${params.pipeline}" == "toxin" ) { - pipe = "toxin-nf" - worm_model1 = "L4_N2_HB101_100w.xml" - worm_model2 = "L2L3_N2_HB101_100w.xml" - worm_model3 = "L1_N2_HB101_100w.xml" - worm_model4 = "MDHD.xml" - model_name1 = "L4_N2_HB101_100w_NonOverlappingWorms" - model_name2 = "L2L3_N2_HB101_100w_NonOverlappingWorms" - model_name3 = "L1_N2_HB101_100w_NonOverlappingWorms" - model_name4 = "MDHD_NonOverlappingWorms" -} else if(!params.pipeline) { +if(!params.pipeline) { println """ Error: pipeline parameter not specified. Please enter --pipeline dauer or --pipeline toxin in command. """ From 45e9d4fcda5246b2b7685ca627955117b28d2d7e Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 14:10:20 -0700 Subject: [PATCH 07/17] Move fixed inputs out of sub-workflows --- main.nf | 10 +++++++--- modules/dauerWorkflow.nf | 9 +++++---- modules/toxinWorkflow.nf | 9 +++++---- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index e91bdd0..3aaf055 100644 --- a/main.nf +++ b/main.nf @@ -97,13 +97,17 @@ C E L L P R O F I L E R - N F P I P E L I N E */ workflow { - + input_data = Channel.fromPath("${params.raw_pipe}") + .combine(Channel.from("${params.metadata_dir}")) + .combine(Channel.from("${params.metadata}")) + .combine(Channel.from("${params.worm_model_dir}")) + if("${params.pipeline}" == "dauer") { - dauer_workflow() + dauer_workflow(input_data) } if("${params.pipeline}" == "toxin") { - toxin_workflow() + toxin_workflow(input_data) } } diff --git a/modules/dauerWorkflow.nf b/modules/dauerWorkflow.nf index 6fcf88f..4762588 100644 --- a/modules/dauerWorkflow.nf +++ b/modules/dauerWorkflow.nf @@ -25,10 +25,11 @@ model_name1 = "dauerMod_NonOverlappingWorms" model_name2 = "nondauerMod_NonOverlappingWorms" workflow dauer_workflow { - config_cp = Channel.fromPath("${params.raw_pipe}") - .combine(Channel.from("${params.metadata_dir}")) - .combine(Channel.from("${params.metadata}")) - .combine(Channel.from("${params.worm_model_dir}")) + take: + input_data_channel + + main: + config_cp = input_data_channel .combine(Channel.from(worm_model1)) // edit here .combine(Channel.from(worm_model2)) // edit here .combine(Channel.fromPath("${params.bin_dir}/config_CP_input_dauer.R")) diff --git a/modules/toxinWorkflow.nf b/modules/toxinWorkflow.nf index 55dee3a..c935ef4 100644 --- a/modules/toxinWorkflow.nf +++ b/modules/toxinWorkflow.nf @@ -29,10 +29,11 @@ model_name3 = "L1_N2_HB101_100w_NonOverlappingWorms" model_name4 = "MDHD_NonOverlappingWorms" workflow toxin_workflow { - config_cp = Channel.fromPath("${params.raw_pipe}") - .combine(Channel.from("${params.metadata_dir}")) - .combine(Channel.from("${params.metadata}")) - .combine(Channel.from("${params.worm_model_dir}")) + take: + input_data_channel + + main: + config_cp = input_data_channel .combine(Channel.from(worm_model1)) // edit here .combine(Channel.from(worm_model2)) // edit here .combine(Channel.from(worm_model3)) // edit here From a790e35b62c36351d682bead22534bddab0cb3d5 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 28 Oct 2022 14:21:42 -0700 Subject: [PATCH 08/17] Finish moving common options out of subworkflows --- main.nf | 9 +++++++-- modules/dauerWorkflow.nf | 9 +++------ modules/process_input.nf | 21 +++++++++++---------- modules/toxinWorkflow.nf | 9 +++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/main.nf b/main.nf index 3aaf055..e5d75dc 100644 --- a/main.nf +++ b/main.nf @@ -101,9 +101,14 @@ workflow { .combine(Channel.from("${params.metadata_dir}")) .combine(Channel.from("${params.metadata}")) .combine(Channel.from("${params.worm_model_dir}")) - + .combine(Channel.from("${params.project}")) + .combine(Channel.from("${params.well_mask}")) + .combine(Channel.from("${params.groups}")) + .combine(Channel.from("${params.edited_pipe}")) + .combine(Channel.from("${params.out}")) + if("${params.pipeline}" == "dauer") { - dauer_workflow(input_data) + dauer_workflow(input_data) } if("${params.pipeline}" == "toxin") { diff --git a/modules/dauerWorkflow.nf b/modules/dauerWorkflow.nf index 4762588..a88756c 100644 --- a/modules/dauerWorkflow.nf +++ b/modules/dauerWorkflow.nf @@ -32,12 +32,9 @@ workflow dauer_workflow { config_cp = input_data_channel .combine(Channel.from(worm_model1)) // edit here .combine(Channel.from(worm_model2)) // edit here - .combine(Channel.fromPath("${params.bin_dir}/config_CP_input_dauer.R")) - .combine(Channel.from("${params.project}")) - .combine(Channel.from("${params.well_mask}")) - .combine(Channel.from("${params.groups}")) - .combine(Channel.from("${params.edited_pipe}")) - .combine(Channel.from("${params.out}")) | config_CP_input_dauer + .combine( + Channel.fromPath("${params.bin_dir}/config_CP_input_dauer.R") + ) | config_CP_input_dauer //.view() // Run CellProfiler diff --git a/modules/process_input.nf b/modules/process_input.nf index bcc6ab5..3914265 100644 --- a/modules/process_input.nf +++ b/modules/process_input.nf @@ -19,14 +19,15 @@ process config_CP_input_dauer { val(meta_dir), val(meta), val(model_dir), - val(model1), - val(model2), - file(config_script), val(project), val(mask), val(group), val(edited_pipe), - val(out) + val(out), + val(model1), + val(model2), + file(config_script) + output: path "*.cppipe", emit: cp_pipeline_file @@ -54,16 +55,16 @@ process config_CP_input_toxin { val(meta_dir), val(meta), val(model_dir), - val(model1), - val(model2), - val(model3), - val(model4), - file(config_script), val(project), val(mask), val(group), val(edited_pipe), - val(out) + val(out), + val(model1), + val(model2), + val(model3), + val(model4), + file(config_script) output: path "*.cppipe", emit: cp_pipeline_file diff --git a/modules/toxinWorkflow.nf b/modules/toxinWorkflow.nf index c935ef4..8b2c396 100644 --- a/modules/toxinWorkflow.nf +++ b/modules/toxinWorkflow.nf @@ -38,12 +38,9 @@ workflow toxin_workflow { .combine(Channel.from(worm_model2)) // edit here .combine(Channel.from(worm_model3)) // edit here .combine(Channel.from(worm_model4)) // edit here - .combine(Channel.fromPath("${params.bin_dir}/config_CP_input_toxin.R")) - .combine(Channel.from("${params.project}")) - .combine(Channel.from("${params.well_mask}")) - .combine(Channel.from("${params.groups}")) - .combine(Channel.from("${params.edited_pipe}")) - .combine(Channel.from("${params.out}")) | config_CP_input_toxin + .combine( + Channel.fromPath("${params.bin_dir}/config_CP_input_toxin.R") + ) | config_CP_input_toxin //.view() // Run CellProfiler From c0e660c2a829fb7bf34be5fc5f529daea9b453b0 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 4 Nov 2022 13:33:50 -0700 Subject: [PATCH 09/17] Give up on gradual modifications in favor of drastic rewrite --- bin/config_CP_input_dauer.R | 92 --------------- bin/config_CP_input_toxin.R | 80 ------------- bin/makeMetadata_dauer.R | 71 +++++++++++ bin/makeMetadata_toxin.R | 58 +++++++++ ...roc_CP_output_dauer.R => proc_CP_output.R} | 16 ++- bin/proc_CP_output_toxin.R | 45 ------- main.nf | 110 ++++++++++-------- modules/cellprofiler.nf | 15 ++- modules/dauerWorkflow.nf | 60 ---------- modules/process_input.nf | 76 +++--------- modules/process_output.nf | 39 ++----- modules/toxinWorkflow.nf | 66 ----------- templates/config_CP_input_dauer.sh | 15 --- templates/config_CP_input_toxin.sh | 17 --- templates/listFiles.sh | 7 ++ templates/makeMetadata.sh | 11 ++ templates/proc_CP_output_dauer.sh | 21 ---- templates/proc_CP_output_toxin.sh | 25 ---- templates/proc_output_dauer.sh | 30 +++++ templates/proc_output_toxin.sh | 30 +++++ templates/runCP.sh | 18 ++- 21 files changed, 325 insertions(+), 577 deletions(-) delete mode 100644 bin/config_CP_input_dauer.R delete mode 100644 bin/config_CP_input_toxin.R create mode 100644 bin/makeMetadata_dauer.R create mode 100644 bin/makeMetadata_toxin.R rename bin/{proc_CP_output_dauer.R => proc_CP_output.R} (74%) delete mode 100644 bin/proc_CP_output_toxin.R delete mode 100644 modules/dauerWorkflow.nf delete mode 100644 modules/toxinWorkflow.nf delete mode 100644 templates/config_CP_input_dauer.sh delete mode 100644 templates/config_CP_input_toxin.sh create mode 100644 templates/listFiles.sh create mode 100644 templates/makeMetadata.sh delete mode 100644 templates/proc_CP_output_dauer.sh delete mode 100644 templates/proc_CP_output_toxin.sh create mode 100644 templates/proc_output_dauer.sh create mode 100644 templates/proc_output_toxin.sh diff --git a/bin/config_CP_input_dauer.R b/bin/config_CP_input_dauer.R deleted file mode 100644 index 17e0690..0000000 --- a/bin/config_CP_input_dauer.R +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env Rscript -library(dplyr) -library(tidyr) -library(tibble) -library(stringr) -library(readr) -library(glue) -library(purrr) -library(data.table) - -#==============================================================================# -# Arguments -#==============================================================================# -# 1 - full path to project directory -# 2 - the path to the well_mask - HARDCODE NOW -# 3 - the group argument from main.nf - default is "plate,well" -# 4 - the edited pipeline path -# 5 - the out path -args <- commandArgs(trailingOnly = TRUE) - -#==============================================================================# -# Make Metadata NEEDS TO BE ADAPATBLE TO MULTIPLE WAVELENGTHS -#==============================================================================# -projDir <- args[1] -projName <- stringr::str_extract(projDir, pattern = "([^/]+$)") - -raw_imagesDir <- paste0(projDir, "/raw_images") - -# parse file names from directory - need wavelength in file name -meta1 <- tibble::tibble(file = list.files(path = raw_imagesDir), - file_path = list.files(path = raw_imagesDir, full.names = T)) %>% - dplyr::mutate(copy = file) %>% - tidyr::separate(col = copy, into = c("date","exp","plate","mag"), sep = "-") %>% - tidyr::separate(col = mag, into = c("mag","well", "wave"), sep = "_") %>% - tidyr::separate(col = wave, into = c("wave","TIF"), sep = "[.]") %>% - dplyr::select(-TIF) %>% - dplyr::mutate(row = stringr::str_extract(well, pattern = "[A-Z]"), - col = stringr::str_extract(well, pattern = "[0-9][0-9]"), - Image_PathName_wellmask_98.png = stringr::str_replace(args[2], pattern = "([^/]+$)", replacement = ""), - Image_FileName_wellmask_98.png = stringr::str_extract(args[2], pattern = "([^/]+$)")) - -# num of wavelengths - add logic for how to make metadata from multiple wavelengths -n_wave <- length(unique(meta1$wave)) - -# add group -groups <- stringr::str_split(args[3], pattern = ",")[[1]] -meta1$group <- apply( meta1[, groups], 1, paste, collapse = "_") - -# add image types and set metadata names - hardcode image names - needs to be flexible for multiple pipeline profiles -meta2 <- meta1 %>% - tidyr::pivot_wider(names_from = wave, values_from = c(file, file_path)) %>% - dplyr::rename(Image_FileName_RawBF = file_w1, - Image_PathName_RawBF = file_path_w1, - Image_FileName_RawRFP = file_w2, - Image_PathName_RawRFP = file_path_w2) %>% - dplyr::mutate(Image_PathName_RawRFP = stringr::str_replace(Image_PathName_RawRFP, pattern = "([^/]+$)", replacement = ""), - Image_PathName_RawBF = stringr::str_replace(Image_PathName_RawBF, pattern = "([^/]+$)", replacement = "")) %>% - dplyr::select(Metadata_Experiment = exp, - Metadata_Date = date, - Metadata_Plate = plate, - Metadata_Well = well, - #Metadata_Column = col, - #Metadata_Row = row, - Metadata_Group = group, - Metadata_Magnification = mag, - Image_FileName_RawBF, - Image_PathName_RawBF, - Image_FileName_RawRFP, - Image_PathName_RawRFP, - Image_FileName_wellmask_98.png, - Image_PathName_wellmask_98.png) - -write.table(meta2, file = glue::glue("metadata.csv"), quote=FALSE, sep=',', row.names = F) - -#==============================================================================# -# Make groups.tsv file for runCP -#==============================================================================# -gs <- meta2 %>% - dplyr::distinct(Metadata_Group, .keep_all=T) %>% - dplyr::mutate(group = paste0("Metadata_Group=", Metadata_Group), - pipeline = args[4], - output = paste0(args[5], "/CP_output/", Metadata_Group)) %>% - dplyr::select(group:output) - -write.table(gs, file = glue::glue("groups.tsv"), quote=FALSE, sep='\t', row.names = F) - -#==============================================================================# -# Make dirs for CP output -#==============================================================================# -for(i in unique(gs$output)){ - dir.create(i, recursive = T) -} diff --git a/bin/config_CP_input_toxin.R b/bin/config_CP_input_toxin.R deleted file mode 100644 index a169b0b..0000000 --- a/bin/config_CP_input_toxin.R +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env Rscript -library(dplyr) -library(tidyr) -library(tibble) -library(stringr) -library(readr) -library(glue) -library(purrr) -library(data.table) - -#==============================================================================# -# Arguments -#==============================================================================# -# 1 - full path to project directory -# 2 - the path to the well_mask - HARDCODE NOW -# 3 - the group argument from main.nf - default is "plate,well" -# 4 - the edited pipeline path -# 5 - the out path -# args <- c("/projects/b1059/projects/Tim/cellprofiler-nf/projects/20220128_GWA09", "/projects/b1059/projects/Tim/cellprofiler-nf/input_data/well_masks/wellmask_98.png", -# "plate,well", "/projects/b1059/projects/Tim/cellprofiler-nf/projects/20220128_GWA09/pipelines/pipeline.cppipe", "/projects/b1059/projects/Tim/cellprofiler-nf/projects/20220128_GWA09/CP_output") -args <- commandArgs(trailingOnly = TRUE) - -#==============================================================================# -# Make Metadata NEEDS TO BE ADAPATBLE TO MULTIPLE WAVELENGTHS -#==============================================================================# -projDir <- args[1] -projName <- stringr::str_extract(projDir, pattern = "([^/]+$)") - -raw_imagesDir <- paste0(projDir, "/raw_images") - -# parse file names from directory - need wavelength in file name -meta1 <- tibble::tibble(file = list.files(path = raw_imagesDir), - file_path = list.files(path = raw_imagesDir, full.names = T)) %>% - dplyr::mutate(copy = file) %>% - tidyr::separate(col = copy, into = c("date","exp","plate","mag"), sep = "-") %>% - tidyr::separate(col = mag, into = c("mag","well"), sep = "_") %>% - tidyr::separate(col = well, into = c("well","TIF"), sep = "[.]") %>% - dplyr::select(-TIF) %>% - dplyr::mutate(row = stringr::str_extract(well, pattern = "[A-Z]"), - col = stringr::str_extract(well, pattern = "[0-9][0-9]"), - Image_PathName_wellmask_98.png = stringr::str_replace(args[2], pattern = "([^/]+$)", replacement = ""), - Image_FileName_wellmask_98.png = stringr::str_extract(args[2], pattern = "([^/]+$)")) - -# add group -groups <- stringr::str_split(args[3], pattern = ",")[[1]] -meta1$group <- apply( meta1[, groups], 1, paste, collapse = "_") - -meta2 <- meta1 %>% - dplyr::mutate(Image_PathName_RawBF = stringr::str_replace(file_path, pattern = "([^/]+$)", replacement = "")) %>% - dplyr::select(Metadata_Experiment = exp, - Metadata_Date = date, - Metadata_Plate = plate, - Metadata_Well = well, - Metadata_Group = group, - Metadata_Magnification = mag, - Image_FileName_RawBF = file, - Image_PathName_RawBF, - Image_FileName_wellmask_98.png, - Image_PathName_wellmask_98.png) - -write.table(meta2, file = glue::glue("metadata.csv"), quote=FALSE, sep=',', row.names = F) - -#==============================================================================# -# Make groups.tsv file for runCP -#==============================================================================# -gs <- meta2 %>% - dplyr::distinct(Metadata_Group, .keep_all=T) %>% - dplyr::mutate(group = paste0("Metadata_Group=", Metadata_Group), - pipeline = args[4], - output = paste0(args[5], "/CP_output/", Metadata_Group)) %>% - dplyr::select(group:output) - -write.table(gs, file = glue::glue("groups.tsv"), quote=FALSE, sep='\t', row.names = F) - -#==============================================================================# -# Make dirs for CP output -#==============================================================================# -for(i in unique(gs$output)){ - dir.create(i, recursive = T) -} diff --git a/bin/makeMetadata_dauer.R b/bin/makeMetadata_dauer.R new file mode 100644 index 0000000..1b36d91 --- /dev/null +++ b/bin/makeMetadata_dauer.R @@ -0,0 +1,71 @@ +#!/usr/bin/env -S Rscript --vanilla +library(dplyr) +library(tidyr) +library(tibble) +library(stringr) +library(readr) +library(purrr) +library(data.table) + +#==============================================================================# +# Arguments +#==============================================================================# +# 1 - A list of input files +# 2 - the path to the well_mask - HARDCODE NOW +# 3 - the group argument from main.nf - default is "plate,well" +# 4 - the edited pipeline path +# 5 - the out path +args <- commandArgs(trailingOnly = TRUE) + +#==============================================================================# +# Make Metadata NEEDS TO BE ADAPATBLE TO MULTIPLE WAVELENGTHS +#==============================================================================# +# parse file names from directory - need wavelength in file name +meta1 <- read_delim( + args[1], + col_names = FALSE, + delim = "\t") %>% + select(file_path = X1) %>% + extract(file_path, into = "file", remove = FALSE, regex = ".*/(.*)$") %>% + extract(file, + remove = FALSE, + regex = "^(.*)-(.*)-(.*)-(.*)_(.*)_(.*_\\.(.*)$", + into = c("date","exp","plate","mag","well","wave","TIF")) %>% + select(-TIF) %>% + dplyr::mutate(row = stringr::str_extract(well, pattern = "[A-Z]"), + col = stringr::str_extract(well, pattern = "[0-9][0-9]"), + Image_PathName_wellmask_98.png = stringr::str_replace(args[2], pattern = "([^/]+$)", replacement = ""), + Image_FileName_wellmask_98.png = stringr::str_extract(args[2], pattern = "([^/]+$)")) + +# add group +groups <- stringr::str_split(args[3], pattern = ",")[[1]] +meta1$group <- apply( meta1[, groups], 1, paste, collapse = "_") + +# add image types and set metadata names - hardcode image names - needs to be flexible for multiple pipeline profiles +meta2 <- meta1 %>% +tidyr::pivot_wider(names_from = wave, values_from = c(file, file_path)) %>% + dplyr::rename(Image_FileName_RawBF = file_w1, + Image_PathName_RawBF = file_path_w1, + Image_FileName_RawRFP = file_w2, + Image_PathName_RawRFP = file_path_w2) %>% + dplyr::mutate( + Image_PathName_RawRFP = stringr::str_replace( + Image_PathName_RawRFP, pattern = "([^/]+$)", replacement = "" + ), + Image_PathName_RawBF = stringr::str_replace( + Image_PathName_RawBF, pattern = "([^/]+$)", replacement = "" + ) + ) %>% + dplyr::select( + Metadata_Experiment = exp, + Metadata_Date = date, + Metadata_Plate = plate, + Metadata_Well = well, + Metadata_Group = group, + Metadata_Magnification = mag, + Image_FileName_RawBF = file, + Image_PathName_RawBF, + Image_FileName_wellmask_98.png, + Image_PathName_wellmask_98.png) + +write.table(meta2, file = "metadata.csv", quote=FALSE, sep=',', row.names = F) \ No newline at end of file diff --git a/bin/makeMetadata_toxin.R b/bin/makeMetadata_toxin.R new file mode 100644 index 0000000..c4e70d0 --- /dev/null +++ b/bin/makeMetadata_toxin.R @@ -0,0 +1,58 @@ +#!/usr/bin/env -S Rscript --vanilla +library(dplyr) +library(tidyr) +library(tibble) +library(stringr) +library(readr) +library(purrr) +library(data.table) + +#==============================================================================# +# Arguments +#==============================================================================# +# 1 - A list of input files +# 2 - the path to the well_mask - HARDCODE NOW +# 3 - the group argument from main.nf - default is "plate,well" +# 4 - the edited pipeline path +# 5 - the out path +args <- commandArgs(trailingOnly = TRUE) + +#==============================================================================# +# Make Metadata NEEDS TO BE ADAPATBLE TO MULTIPLE WAVELENGTHS +#==============================================================================# +# parse file names from directory - need wavelength in file name +meta1 <- read_delim( + args[1], + col_names = FALSE, + delim = "\t") %>% + select(file_path = X1) %>% + extract(file_path, into = "file", remove = FALSE, regex = ".*/(.*)$") %>% + extract(file, + remove = FALSE, + regex = "^(.*)-(.*)-(.*)-(.*)_(.*)\\.(.*)$", + into = c("date","exp","plate","mag","well","TIF")) %>% + select(-TIF) %>% + dplyr::mutate(row = stringr::str_extract(well, pattern = "[A-Z]"), + col = stringr::str_extract(well, pattern = "[0-9][0-9]"), + Image_PathName_wellmask_98.png = stringr::str_replace(args[2], pattern = "([^/]+$)", replacement = ""), + Image_FileName_wellmask_98.png = stringr::str_extract(args[2], pattern = "([^/]+$)")) + +# add group +groups <- stringr::str_split(args[3], pattern = ",")[[1]] +meta1$group <- apply( meta1[, groups], 1, paste, collapse = "_") + +# add image types and set metadata names - hardcode image names - needs to be flexible for multiple pipeline profiles +meta2 <- meta1 %>% + dplyr::mutate(Image_PathName_RawBF = stringr::str_replace(file_path, pattern = "([^/]+$)", replacement = "")) %>% + dplyr::select(Metadata_Experiment = exp, + Metadata_Date = date, + Metadata_Plate = plate, + Metadata_Well = well, + Metadata_Group = group, + Metadata_Magnification = mag, + Image_FileName_RawBF = file, + Image_PathName_RawBF, + Image_FileName_wellmask_98.png, + Image_PathName_wellmask_98.png) + +write.table(meta2, file = "metadata.csv", quote=FALSE, sep=',', row.names = F) \ No newline at end of file diff --git a/bin/proc_CP_output_dauer.R b/bin/proc_CP_output.R similarity index 74% rename from bin/proc_CP_output_dauer.R rename to bin/proc_CP_output.R index a25647f..67ce906 100644 --- a/bin/proc_CP_output_dauer.R +++ b/bin/proc_CP_output.R @@ -1,24 +1,25 @@ -#!/usr/bin/env Rscript +#!/usr/bin/env -S Rscript --vanilla library(fs) library(dplyr) library(tidyr) library(tibble) library(stringr) library(readr) -library(glue) library(purrr) #==============================================================================# # Arguments #==============================================================================# # 1 - out directory path +# 2 - project name +# 3 - run stamp args <- commandArgs(trailingOnly = TRUE) #==============================================================================# # Read CP output data #==============================================================================# # get the output for each model -dir <- glue::glue("{args[1]}/processed_data") +dir <- "processed_data" # read in files and manipulate with model_df <- dir %>% @@ -36,10 +37,7 @@ model_df_list <- split.data.frame(model_df, model_df$model) lapply(seq_along(model_df_list), function(i) assign(names(model_df_list)[i], model_df_list[[i]], envir = .GlobalEnv)) # save as R.data -proj_name <- stringr::str_extract(args[1], pattern = "[^\\/]+(?=(?:\\/[^\\/]+){1}$)") -run_stamp <- stringr::str_extract(args[1], pattern = "([^/]+$)") +proj_name <- args[1] +run_stamp <- args[2] save(list = c(ls(pattern = "model.outputs")), - file = glue::glue("{args[1]}/processed_data/{proj_name}_{run_stamp}.RData")) - -# clean up extra CP_outputs for now -system(command = glue::glue("if [ -d {args[1]}/CP_output ]; then rm -Rf {args[1]}/CP_output; fi")) \ No newline at end of file + file = paste0("processed_data/", args[1], "_", args[2], ".RData")) \ No newline at end of file diff --git a/bin/proc_CP_output_toxin.R b/bin/proc_CP_output_toxin.R deleted file mode 100644 index a25647f..0000000 --- a/bin/proc_CP_output_toxin.R +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env Rscript -library(fs) -library(dplyr) -library(tidyr) -library(tibble) -library(stringr) -library(readr) -library(glue) -library(purrr) - -#==============================================================================# -# Arguments -#==============================================================================# -# 1 - out directory path -args <- commandArgs(trailingOnly = TRUE) - -#==============================================================================# -# Read CP output data -#==============================================================================# -# get the output for each model -dir <- glue::glue("{args[1]}/processed_data") - -# read in files and manipulate with -model_df <- dir %>% - fs::dir_ls(regexp = "\\.csv$") %>% # find paths to csvs in dir - purrr::map_dfr(readr::read_csv, .id = "model") %>% - dplyr::mutate(Metadata_Date = as.integer(Metadata_Date), - model = stringr::str_remove(fs::path_file(as.character(model)), pattern = ".csv"), - model = paste0(model, ".model.outputs")) %>% - dplyr::arrange(model, ImageNumber) - -# split to list -model_df_list <- split.data.frame(model_df, model_df$model) - -# export list items to global env -lapply(seq_along(model_df_list), function(i) assign(names(model_df_list)[i], model_df_list[[i]], envir = .GlobalEnv)) - -# save as R.data -proj_name <- stringr::str_extract(args[1], pattern = "[^\\/]+(?=(?:\\/[^\\/]+){1}$)") -run_stamp <- stringr::str_extract(args[1], pattern = "([^/]+$)") -save(list = c(ls(pattern = "model.outputs")), - file = glue::glue("{args[1]}/processed_data/{proj_name}_{run_stamp}.RData")) - -# clean up extra CP_outputs for now -system(command = glue::glue("if [ -d {args[1]}/CP_output ]; then rm -Rf {args[1]}/CP_output; fi")) \ No newline at end of file diff --git a/main.nf b/main.nf index e5d75dc..ed08ab4 100644 --- a/main.nf +++ b/main.nf @@ -11,13 +11,12 @@ if( !nextflow.version.matches('>20.0') ) { } // INCLUDE modules -include { - dauer_workflow -} from './modules/dauerWorkflow.nf' -include { - toxin_workflow -} from './modules/toxinWorkflow.nf' - +// include { +// dauer_workflow +// } from './modules/dauerWorkflow.nf' +// include { +// toxin_workflow +// } from './modules/toxinWorkflow.nf' /* ~ ~ ~ > * PARAMETERS SETUP @@ -25,7 +24,6 @@ include { // Variables date = new Date().format('yyyyMMdd') -// model_name = "NonOverlappingWorms" // JUST FOR NOW // Setup pipeline parameter params.pipeline = null @@ -34,32 +32,44 @@ if(!params.pipeline) { Error: pipeline parameter not specified. Please enter --pipeline dauer or --pipeline toxin in command. """ System.exit(1) -} else if("${params.pipeline}" != "toxin" || "${params.pipeline}" != "dauer" ) { +} else if(params.pipeline != "toxin" && params.pipeline != "dauer" ) { println """ Error: pipeline (${params.pipeline}) does not match expected value. Please enter either dauer or toxin. """ System.exit(1) } -// Configure other parameters +date = new Date().format('yyyyMMdd') + +// Help: params.help = null -params.debug = null +// project directory params.project = null +// Groups to use params.groups = "plate,well" +// directory with input data params.data_dir = "${workflow.projectDir}/input_data" // this is different for gcp -params.bin_dir = "${workflow.projectDir}/bin" // this is different for gcp -params.well_mask = "${params.data_dir}/well_masks/wellmask_98.png" +// mask for the well +params.well_mask = "input_data/well_masks/wellmask_98.png" +// location to put output files params.out = "${params.project}/Analysis-${date}" -params.raw_pipe_dir = "${params.data_dir}/CP_pipelines" -params.raw_pipe = "${params.raw_pipe_dir}/${pipe}.cppipe" -params.edited_pipe = "${params.out}/pipeline/pipeline.cppipe" -params.metadata_dir = "${params.out}/metadata" -params.metadata = "metadata.csv" -params.worm_model_dir = "${params.data_dir}/worm_models" -/* -~ ~ ~ > * LOG AND HELP MESSAGE SETUP -*/ +params.project_name = params.project.split("/").last() +params.project_tag = "Analysis-${date}" + +params.container__general = "docker://andersenlab/nemascan:20220411181933701519" +params.container__cellprofiler = "cellprofiler/cellprofiler:4.2.1" + +include { + listFiles + makeMetadata +} from './modules/process_input.nf' +include { + runCP +} from './modules/cellprofiler.nf' +include { + proc_CP_output +} from './modules/process_output.nf' if (!params.help) { log.info ''' @@ -67,7 +77,8 @@ C E L L P R O F I L E R - N F P I P E L I N E =============================================== ''' log.info "" - log.info "Project = ${params.project}" + log.info "Projcect = ${params.project_name}" + log.info "Project Dir = ${params.project}" log.info "CP pipeline = ${params.pipeline}" log.info "Groups = ${params.groups}" log.info "Output = ${params.out}" @@ -86,39 +97,37 @@ C E L L P R O F I L E R - N F P I P E L I N E log.info "--pipeline The CP pipeline to use: toxin, dauer" log.info "" log.info "Optional arguments:" - log.info "--groups comma separated metadata groupings for CellProfiler, default is plate,well" - log.info "--outdir Output directory to place files, default is project/Analysis-{current date}" + log.info "--project_name A name for the project. Default detects from the project directory." + log.info "--groups Comma separated metadata groupings for CellProfiler, default is plate,well" + log.info "--out Output directory to place files, default is project/Analysis-{current date}" log.info "--help This usage statement." exit 1 } -/* -~ ~ ~ > * WORKFLOW -*/ - workflow { - input_data = Channel.fromPath("${params.raw_pipe}") - .combine(Channel.from("${params.metadata_dir}")) - .combine(Channel.from("${params.metadata}")) - .combine(Channel.from("${params.worm_model_dir}")) - .combine(Channel.from("${params.project}")) - .combine(Channel.from("${params.well_mask}")) - .combine(Channel.from("${params.groups}")) - .combine(Channel.from("${params.edited_pipe}")) - .combine(Channel.from("${params.out}")) - - if("${params.pipeline}" == "dauer") { - dauer_workflow(input_data) - } - - if("${params.pipeline}" == "toxin") { - toxin_workflow(input_data) - } + in_data_dir = Channel.fromPath("${params.project}") + + listFiles(in_data_dir) + makeMetadata(listFiles.out) + groups = makeMetadata.out + .splitCsv(header: true) + .map { + row -> ["${row.Metadata_Group}", + "input_data/CP_pipelines/${params.pipeline}-nf.cppipe"] + } + runCP_input = groups + .combine(in_data_dir) + .combine( + Channel.fromPath( + "${workflow.projectDir}/input_data" + ) + ) + .combine(makeMetadata.out) + runCP(runCP_input) + concat_outputs = runCP.out.output_files.toList() + proc_CP_output(concat_outputs) } -/* -~ ~ ~ > * GENERATE REPORT -*/ workflow.onComplete { summary = """ @@ -133,7 +142,8 @@ workflow.onComplete { Git info: $workflow.repository - $workflow.revision [$workflow.commitId] { Parameters } --------------------------- - Project = ${params.project} + Project = ${params.project_name} + Project Dir = ${params.project} Pipeline Used = ${params.pipeline} Result Directory = ${params.out} """ diff --git a/modules/cellprofiler.nf b/modules/cellprofiler.nf index 78c1bdd..1f626ac 100644 --- a/modules/cellprofiler.nf +++ b/modules/cellprofiler.nf @@ -4,18 +4,25 @@ nextflow.enable.dsl=2 process runCP { - + container "${params.container__cellprofiler}" label "cellpro" input: tuple val(group), - file(pipeline), - file(output) + val(pipeline), + path(input_data), + path("input_data"), + path("metadata.csv") output: - stdout emit: cp_output + stdout emit: cp_output + path "${group}", emit: output_files //tuple file("*.csv"), file("*.png"), emit: cp_output + // publishDir "${params.out}/${params.project}/Analysis-${date}", + // mode: 'copy', + // pattern: "CP_output/${group}" + script: template 'runCP.sh' } \ No newline at end of file diff --git a/modules/dauerWorkflow.nf b/modules/dauerWorkflow.nf deleted file mode 100644 index a88756c..0000000 --- a/modules/dauerWorkflow.nf +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env nextflow - -// Using DSL-2 -nextflow.enable.dsl=2 - -// INCLUDE modules -include { - config_CP_input_dauer -} from './process_input.nf' -include { - proc_CP_output_dauer -} from './process_output.nf' -include { - runCP -} from './cellprofiler.nf' - -/* -~ ~ ~ > * PARAMETERS SETUP -*/ - -pipe = "dauer-nf" -worm_model1 = "dauerMod.xml" -worm_model2 = "nondauerMod.xml" -model_name1 = "dauerMod_NonOverlappingWorms" -model_name2 = "nondauerMod_NonOverlappingWorms" - -workflow dauer_workflow { - take: - input_data_channel - - main: - config_cp = input_data_channel - .combine(Channel.from(worm_model1)) // edit here - .combine(Channel.from(worm_model2)) // edit here - .combine( - Channel.fromPath("${params.bin_dir}/config_CP_input_dauer.R") - ) | config_CP_input_dauer - //.view() - - // Run CellProfiler - groups = config_CP_input_dauer.out.groups_file - .splitCsv(header:true, sep: "\t") - .map { row -> - [row.group, file("${row.pipeline}"), file("${row.output}")] - } - //.view() - - runCP(groups) - - // Preprocess CellProfiler output files - proc_cp = runCP.out.cp_output - .last() // This ensures that all items are emitted from runCP - .combine(Channel.from("${params.out}")) - .combine(Channel.from(model_name1)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name2)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.fromPath("${params.bin_dir}/proc_CP_output_dauer.R")) - //.view() - - proc_CP_output_dauer(proc_cp) -} \ No newline at end of file diff --git a/modules/process_input.nf b/modules/process_input.nf index 3914265..350d57a 100644 --- a/modules/process_input.nf +++ b/modules/process_input.nf @@ -3,75 +3,31 @@ // Using DSL-2 nextflow.enable.dsl=2 -process config_CP_input_dauer { - publishDir "${params.out}/pipeline", - mode: 'copy', - pattern: "*.cppipe" - publishDir "${params.out}/metadata", - mode: 'copy', - pattern: "metadata.csv" - publishDir "${params.out}/groups", - mode: 'copy', - pattern: "groups.tsv" - - input: - tuple file(raw_pipe), - val(meta_dir), - val(meta), - val(model_dir), - val(project), - val(mask), - val(group), - val(edited_pipe), - val(out), - val(model1), - val(model2), - file(config_script) - +process listFiles { + container "${params.container__general}" + input: + path input_dir output: - path "*.cppipe", emit: cp_pipeline_file - path "metadata.csv", emit: metadata_file - path "groups.tsv", emit: groups_file - + path "fileList.txt" script: - template 'config_CP_input_dauer.sh' + template 'listFiles.sh' } -process config_CP_input_toxin { - publishDir "${params.out}/pipeline", - mode: 'copy', - pattern: "*.cppipe" - publishDir "${params.out}/metadata", - mode: 'copy', - pattern: "metadata.csv" - publishDir "${params.out}/groups", - mode: 'copy', - pattern: "groups.tsv" +process makeMetadata { + container "${params.container__general}" input: - tuple file(raw_pipe), - val(meta_dir), - val(meta), - val(model_dir), - val(project), - val(mask), - val(group), - val(edited_pipe), - val(out), - val(model1), - val(model2), - val(model3), - val(model4), - file(config_script) + path in_fileList output: - path "*.cppipe", emit: cp_pipeline_file - path "metadata.csv", emit: metadata_file - path "groups.tsv", emit: groups_file - + path "metadata.csv" + + publishDir "${params.out}/metadata", + mode: 'copy', + pattern: "metadata.csv" script: - template 'config_CP_input_toxin.sh' -} \ No newline at end of file + template 'makeMetadata.sh' +} diff --git a/modules/process_output.nf b/modules/process_output.nf index 8104adc..971e506 100644 --- a/modules/process_output.nf +++ b/modules/process_output.nf @@ -3,41 +3,20 @@ // Using DSL-2 nextflow.enable.dsl=2 -process proc_CP_output_dauer { +process proc_CP_output { + container "${params.container__general}" - //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" + publishDir "${params.out}", + mode: 'copy', + pattern: "processed_{data,images}" input: - tuple val(cp_output), - val(out_dir), - val(model_name1), - val(model_name2), - file(proc_CP_out_script) + path cp_output output: - //path "*.RData", emit: cp_out_dat + path "processed_data", emit: proc_dat + path "processed_images", emit: proc_img script: - template 'proc_CP_output_dauer.sh' -} - -process proc_CP_output_toxin { - - //publishDir "${params.out}/processed_data", mode: 'copy', pattern: "*.RData" - - input: - tuple val(cp_output), - val(out_dir), - val(model_name1), - val(model_name2), - val(model_name3), - val(model_name4), - file(proc_CP_out_script) - - output: - //path "*.RData", emit: cp_out_dat - - script: - template 'proc_CP_output_toxin.sh' -} + template "proc_output_${params.pipeline}.sh" } \ No newline at end of file diff --git a/modules/toxinWorkflow.nf b/modules/toxinWorkflow.nf deleted file mode 100644 index 8b2c396..0000000 --- a/modules/toxinWorkflow.nf +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env nextflow - -// Using DSL-2 -nextflow.enable.dsl=2 - -// INCLUDE modules -include { - config_CP_input_toxin -} from './process_input.nf' -include { - proc_CP_output_toxin -} from './process_output.nf' -include { - runCP -} from './cellprofiler.nf' - -/* -~ ~ ~ > * PARAMETERS SETUP -*/ - -pipe = "toxin-nf" -worm_model1 = "L4_N2_HB101_100w.xml" -worm_model2 = "L2L3_N2_HB101_100w.xml" -worm_model3 = "L1_N2_HB101_100w.xml" -worm_model4 = "MDHD.xml" -model_name1 = "L4_N2_HB101_100w_NonOverlappingWorms" -model_name2 = "L2L3_N2_HB101_100w_NonOverlappingWorms" -model_name3 = "L1_N2_HB101_100w_NonOverlappingWorms" -model_name4 = "MDHD_NonOverlappingWorms" - -workflow toxin_workflow { - take: - input_data_channel - - main: - config_cp = input_data_channel - .combine(Channel.from(worm_model1)) // edit here - .combine(Channel.from(worm_model2)) // edit here - .combine(Channel.from(worm_model3)) // edit here - .combine(Channel.from(worm_model4)) // edit here - .combine( - Channel.fromPath("${params.bin_dir}/config_CP_input_toxin.R") - ) | config_CP_input_toxin - //.view() - - // Run CellProfiler - groups = config_CP_input_toxin.out.groups_file - .splitCsv(header:true, sep: "\t") - .map { row -> - [row.group, file("${row.pipeline}"), file("${row.output}")] - } - //.view() - - runCP(groups) - - // Preprocess CellProfiler output files - proc_cp = runCP.out.cp_output - .last() // This ensures that all items are emitted from runCP - .combine(Channel.from("${params.out}")) - .combine(Channel.from(model_name1)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name2)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name3)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.from(model_name4)) // HARDCODE VARIABLE NOW MAKE DEPENDENT ON PROFILE - .combine(Channel.fromPath("${params.bin_dir}/proc_CP_output_toxin.R")) | proc_CP_output_toxin - //.view() -} diff --git a/templates/config_CP_input_dauer.sh b/templates/config_CP_input_dauer.sh deleted file mode 100644 index 86c99ee..0000000 --- a/templates/config_CP_input_dauer.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -e -set -o pipefail -set -x - -# Configure the raw pipeline for CellProfiler -awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ -awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ -awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ -awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ -awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' > pipeline.cppipe - -# Configure metadata and groups for CellProfiller with config_CP_input.R -Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} diff --git a/templates/config_CP_input_toxin.sh b/templates/config_CP_input_toxin.sh deleted file mode 100644 index 3795051..0000000 --- a/templates/config_CP_input_toxin.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -e -set -o pipefail -set -x - -# Configure the raw pipeline for CellProfiler -awk '{gsub(/METADATA_DIR/,"${meta_dir}"); print}' ${raw_pipe} | \\ -awk '{gsub(/METADATA_CSV_FILE/,"${meta}"); print}' | \\ -awk '{gsub(/WORM_MODEL_DIR/,"${model_dir}"); print}' | \\ -awk '{gsub(/MODEL1_XML_FILE/,"${model1}"); print}' | \\ -awk '{gsub(/MODEL2_XML_FILE/,"${model2}"); print}' | \\ -awk '{gsub(/MODEL3_XML_FILE/,"${model3}"); print}' | \\ -awk '{gsub(/MODEL4_XML_FILE/,"${model4}"); print}' > pipeline.cppipe - -# Configure metadata and groups for CellProfiller with config_CP_input.R -Rscript --vanilla ${config_script} ${project} ${mask} ${group} ${edited_pipe} ${out} diff --git a/templates/listFiles.sh b/templates/listFiles.sh new file mode 100644 index 0000000..b5eb3aa --- /dev/null +++ b/templates/listFiles.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +ls -1 ${input_dir}/raw_images/* > fileList.txt \ No newline at end of file diff --git a/templates/makeMetadata.sh b/templates/makeMetadata.sh new file mode 100644 index 0000000..7fe6758 --- /dev/null +++ b/templates/makeMetadata.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +makeMetadata_${params.pipeline}.R \\ +${in_fileList} \\ +${params.well_mask} \\ +${params.groups} +echo "done" \ No newline at end of file diff --git a/templates/proc_CP_output_dauer.sh b/templates/proc_CP_output_dauer.sh deleted file mode 100644 index ea9d4d0..0000000 --- a/templates/proc_CP_output_dauer.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -set -e -set -o pipefail -set -x - -# remove exisitng directories if present and make fresh -if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi -mkdir ${out_dir}/processed_data -if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi -mkdir ${out_dir}/processed_images -# find .csv files, concatenate them, and write new file -find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv - -# find .csv files, concatenate them, and write new file -find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv - -# move all the output images to process_images directory END WITH /? -find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; -# Process the CellProfiler output with proc_CP_output.R -Rscript --vanilla ${proc_CP_out_script} ${out_dir} \ No newline at end of file diff --git a/templates/proc_CP_output_toxin.sh b/templates/proc_CP_output_toxin.sh deleted file mode 100644 index ac5a7aa..0000000 --- a/templates/proc_CP_output_toxin.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -set -e -set -o pipefail -set -x - -# remove exisitng directories if present and make fresh -if [ -d ${out_dir}/processed_data ]; then rm -Rf ${out_dir}/processed_data; fi -mkdir ${out_dir}/processed_data -if [ -d ${out_dir}/processed_images ]; then rm -Rf ${out_dir}/processed_images; fi -mkdir ${out_dir}/processed_images -# find .csv files, concatenate them, and write new file -find ${out_dir}/CP_output -type f -name '${model_name1}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name1}.csv - -# find .csv files, concatenate them, and write new file -find ${out_dir}/CP_output -type f -name '${model_name2}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name2}.csv -# find .csv files, concatenate them, and write new file -find ${out_dir}/CP_output -type f -name '${model_name3}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name3}.csv -# find .csv files, concatenate them, and write new file -find ${out_dir}/CP_output -type f -name '${model_name4}.csv' -print0 | xargs -0 awk 'FNR>1 || NR==1 {print}' > ${out_dir}/processed_data/${model_name4}.csv - -# move all the output images to process_images directory END WITH /? -find ${out_dir}/CP_output -name '*.png' -exec mv {} ${out_dir}/processed_images \\; -# Process the CellProfiler output with proc_CP_output.R -Rscript --vanilla ${proc_CP_out_script} ${out_dir} \ No newline at end of file diff --git a/templates/proc_output_dauer.sh b/templates/proc_output_dauer.sh new file mode 100644 index 0000000..808a383 --- /dev/null +++ b/templates/proc_output_dauer.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +# remove exisitng directories if present and make fresh +mkdir processed_data + + +# find .csv files, concatenate them, and write new file +for model_name in dauerMod_NonOverlappingWorms nondauerMod_NonOverlappingWorms; do +first_line=TRUE +for fIter in \$(ls -1 */\${model_name}.csv); do +if [ "\${first_line}" = "TRUE" ]; then +cat \${fIter} > processed_data/\${model_name}.csv +else +tail -n +2 \${fIter} >> processed_data/\${model_name}.csv +fi +done +done + +mkdir processed_images +# move all the output images to process_images directory END WITH /? +cp */*.png processed_images/ + +# Process the CellProfiler output with proc_CP_output.R +proc_CP_output.R \\ +${params.project_name} \\ +${params.project_tag} \ No newline at end of file diff --git a/templates/proc_output_toxin.sh b/templates/proc_output_toxin.sh new file mode 100644 index 0000000..b4f7f43 --- /dev/null +++ b/templates/proc_output_toxin.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -e +set -o pipefail +set -x + +# remove exisitng directories if present and make fresh +mkdir processed_data + +# find .csv files, concatenate them, and write new file +for model_name in L4_N2_HB101_100w_NonOverlappingWorms L2L3_N2_HB101_100w_NonOverlappingWorms L1_N2_HB101_100w_NonOverlappingWorms MDHD_NonOverlappingWorms; do +first_line=TRUE +for fIter in \$(ls -1 */\${model_name}.csv); do +if [ "\${first_line}" = "TRUE" ]; then +cat \${fIter} > processed_data/\${model_name}.csv +else +tail -n +2 \${fIter} >> processed_data/\${model_name}.csv +fi +done +done + +mkdir processed_images + +# move all the output images to process_images directory END WITH /? +cp */*.png processed_images/ + +# Process the CellProfiler output with proc_CP_output.R +proc_CP_output.R \\ +${params.project_name} \\ +${params.project_tag} \ No newline at end of file diff --git a/templates/runCP.sh b/templates/runCP.sh index 6fe147f..bb73e03 100644 --- a/templates/runCP.sh +++ b/templates/runCP.sh @@ -4,7 +4,19 @@ set -e set -o pipefail set -x +pwd +ls -lh + +chmod a+x ${pipeline} +export MPLCONFIGDIR=cellProfiler_tmp + +mkdir cellProfiler_tmp +mkdir -p ${group} + # Run cellprofiler headless -cellprofiler -c -r -p ${pipeline} \ --g ${group} \ --o ${output} \ No newline at end of file +cellprofiler -c -r \\ +-i \$(pwd) \\ +-p ${pipeline} \\ +-g Metadata_Group=${group} \\ +-o ${group} \\ +-t cellProfiler_tmp \ No newline at end of file From 0a9dcf70c90742fba41cf6c052930ba1aeb7b3f9 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 4 Nov 2022 13:39:48 -0700 Subject: [PATCH 10/17] Update permissions for R scripts --- bin/makeMetadata_dauer.R | 0 bin/makeMetadata_toxin.R | 0 bin/proc_CP_output.R | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 bin/makeMetadata_dauer.R mode change 100644 => 100755 bin/makeMetadata_toxin.R mode change 100644 => 100755 bin/proc_CP_output.R diff --git a/bin/makeMetadata_dauer.R b/bin/makeMetadata_dauer.R old mode 100644 new mode 100755 diff --git a/bin/makeMetadata_toxin.R b/bin/makeMetadata_toxin.R old mode 100644 new mode 100755 diff --git a/bin/proc_CP_output.R b/bin/proc_CP_output.R old mode 100644 new mode 100755 From 54d60ebbcb2a9bcfe3f45576ca5ababe84398c6c Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 4 Nov 2022 13:45:44 -0700 Subject: [PATCH 11/17] Update .cppipe files --- input_data/CP_pipelines/dauer-nf.cppipe | 20 ++++++++++---------- input_data/CP_pipelines/toxin-nf.cppipe | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/input_data/CP_pipelines/dauer-nf.cppipe b/input_data/CP_pipelines/dauer-nf.cppipe index efc0392..0536a9c 100644 --- a/input_data/CP_pipelines/dauer-nf.cppipe +++ b/input_data/CP_pipelines/dauer-nf.cppipe @@ -6,8 +6,8 @@ ModuleCount:31 HasImagePlaneDetails:False LoadData:[module_num:1|svn_version:'Unknown'|variable_revision_number:6|show_window:True|notes:[]|batch_state:array([], dtype=uint8)|enabled:True|wants_pause:False] - Input data file location:Elsewhere...|METADATA_DIR - Name of the file:METADATA_CSV_FILE + Input data file location:Elsewhere...|. + Name of the file:metadata.csv Load images based on this data?:Yes Base image location:Elsewhere...| Process just a range of rows?:No @@ -167,8 +167,8 @@ UntangleWorms:[module_num:10|svn_version:'Unknown'|variable_revision_number:2|sh Overlap style:Both Name the output overlapping worm objects:dauerMod_OverlappingWorms Name the output non-overlapping worm objects:dauerMod_NonOverlappingWorms - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL1_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:dauerMod.xml Use training set weights?:Yes Overlap weight:5.0 Leftover weight:10.0 @@ -199,8 +199,8 @@ UntangleWorms:[module_num:11|svn_version:'Unknown'|variable_revision_number:2|sh Overlap style:Both Name the output overlapping worm objects:nondauerMod_OverlappingWorms Name the output non-overlapping worm objects:nondauerMod_NonOverlappingWorms - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL2_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:nondauerMod.xml Use training set weights?:Yes Overlap weight:5.0 Leftover weight:10.0 @@ -230,8 +230,8 @@ StraightenWorms:[module_num:12|svn_version:'Unknown'|variable_revision_number:3| Select the input untangled worm objects:dauerMod_NonOverlappingWorms Name the output straightened worm objects:dauerMod_StraightenedWorms Worm width:20 - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL1_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:dauerMod.xml Image count:1 Measure intensity distribution?:Yes Number of transverse segments:1 @@ -245,8 +245,8 @@ StraightenWorms:[module_num:13|svn_version:'Unknown'|variable_revision_number:3| Select the input untangled worm objects:nondauerMod_NonOverlappingWorms Name the output straightened worm objects:nondauerMod_StraightenedWorms Worm width:20 - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL2_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:nondauerMod.xml Image count:1 Measure intensity distribution?:Yes Number of transverse segments:1 diff --git a/input_data/CP_pipelines/toxin-nf.cppipe b/input_data/CP_pipelines/toxin-nf.cppipe index 070fb30..7e580b7 100644 --- a/input_data/CP_pipelines/toxin-nf.cppipe +++ b/input_data/CP_pipelines/toxin-nf.cppipe @@ -6,8 +6,8 @@ ModuleCount:22 HasImagePlaneDetails:False LoadData:[module_num:1|svn_version:'Unknown'|variable_revision_number:6|show_window:False|notes:[]|batch_state:array([], dtype=uint8)|enabled:True|wants_pause:False] - Input data file location:Elsewhere...|METADATA_DIR - Name of the file:METADATA_CSV_FILE + Input data file location:Elsewhere...|. + Name of the file:metadata.csv Load images based on this data?:Yes Base image location:Default Input Folder| Process just a range of rows?:No @@ -159,8 +159,8 @@ UntangleWorms:[module_num:9|svn_version:'Unknown'|variable_revision_number:2|sho Overlap style:Both Name the output overlapping worm objects:L4_N2_HB101_100w_OverlappingWorms Name the output non-overlapping worm objects:L4_N2_HB101_100w_NonOverlappingWorms - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL1_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:L4_N2_HB101_100w.xml Use training set weights?:Yes Overlap weight:5.0 Leftover weight:10.0 @@ -191,8 +191,8 @@ UntangleWorms:[module_num:10|svn_version:'Unknown'|variable_revision_number:2|sh Overlap style:Both Name the output overlapping worm objects:L2L3_N2_HB101_100w_OverlappingWorms Name the output non-overlapping worm objects:L2L3_N2_HB101_100w_NonOverlappingWorms - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL2_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:L2L3_N2_HB101_100w.xml Use training set weights?:Yes Overlap weight:5.0 Leftover weight:10.0 @@ -223,8 +223,8 @@ UntangleWorms:[module_num:11|svn_version:'Unknown'|variable_revision_number:2|sh Overlap style:Both Name the output overlapping worm objects:L1_N2_HB101_100w_OverlappingWorms Name the output non-overlapping worm objects:L1_N2_HB101_100w_NonOverlappingWorms - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL3_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:L1_N2_HB101_100w.xml Use training set weights?:Yes Overlap weight:5.0 Leftover weight:10.0 @@ -255,8 +255,8 @@ UntangleWorms:[module_num:12|svn_version:'Unknown'|variable_revision_number:2|sh Overlap style:Both Name the output overlapping worm objects:MDHD_OverlappingWorms Name the output non-overlapping worm objects:MDHD_NonOverlappingWorms - Training set file location:Elsewhere...|WORM_MODEL_DIR - Training set file name:MODEL4_XML_FILE + Training set file location:Elsewhere...|input_data/worm_models + Training set file name:MDHD.xml Use training set weights?:Yes Overlap weight:5.0 Leftover weight:10.0 From ae52cccc1baddb85f564e1e0fa28611eb2a53e63 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 4 Nov 2022 14:04:04 -0700 Subject: [PATCH 12/17] Remove core-specific options from nextflow.config --- nextflow.config | 6 ------ 1 file changed, 6 deletions(-) diff --git a/nextflow.config b/nextflow.config index 4f46da7..b72e959 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,16 +35,10 @@ singularity { process { executor = 'slurm' - queue = 'genomicsguestA' - clusterOptions = '-A b1042 -t 04:00:00 -e errlog.txt' - container = 'andersenlab/nemascan:20220407173056db3227' // still need to make cp docker img errorStrategy='retry' maxRetries=3 withLabel: cellpro { - container = 'cellprofiler/cellprofiler:4.2.1' - queue = 'genomicsguestA' - clusterOptions = '-A b1042' memory = { 8.GB * task.attempt } time = { 15.min * task.attempt } errorStrategy='retry' From 820939684a9ccacb197dfe4d60ba000944002890 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 4 Nov 2022 14:04:42 -0700 Subject: [PATCH 13/17] Finish removing core-specific options from nextflow.config --- nextflow.config | 2 -- 1 file changed, 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index b72e959..e62760c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,8 +29,6 @@ singularity { pullTimeout = '45 min' enabled = true autoMounts = true - cacheDir = "/projects/b1059/singularity" // this is QUEST specific still - } process { From 20058b25755e479b49829125fab930e23eeb12fc Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Fri, 4 Nov 2022 14:08:47 -0700 Subject: [PATCH 14/17] Make slurm the executor --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index e62760c..634a662 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,7 @@ timeline { } executor { + name='slurm' queueSize = 5000 submitRateLimit = 10 } From 3ff0a88779906ae68ce6355b15caa75671e5c119 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Tue, 8 Nov 2022 09:42:51 -0800 Subject: [PATCH 15/17] Fix regex for metadata extraction --- bin/makeMetadata_dauer.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/makeMetadata_dauer.R b/bin/makeMetadata_dauer.R index 1b36d91..c38c0ba 100755 --- a/bin/makeMetadata_dauer.R +++ b/bin/makeMetadata_dauer.R @@ -29,7 +29,7 @@ meta1 <- read_delim( extract(file_path, into = "file", remove = FALSE, regex = ".*/(.*)$") %>% extract(file, remove = FALSE, - regex = "^(.*)-(.*)-(.*)-(.*)_(.*)_(.*_\\.(.*)$", + regex = "^(.*)-(.*)-(.*)-(.*)_(.*)_(.*)_\\.(.*)$", into = c("date","exp","plate","mag","well","wave","TIF")) %>% select(-TIF) %>% dplyr::mutate(row = stringr::str_extract(well, pattern = "[A-Z]"), From 3278f45f8e130a8a31f5ac4b76c3d1d5078c7d55 Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Tue, 8 Nov 2022 10:00:08 -0800 Subject: [PATCH 16/17] Fix dauer metadata script --- bin/makeMetadata_dauer.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bin/makeMetadata_dauer.R b/bin/makeMetadata_dauer.R index c38c0ba..0cba482 100755 --- a/bin/makeMetadata_dauer.R +++ b/bin/makeMetadata_dauer.R @@ -16,7 +16,6 @@ library(data.table) # 4 - the edited pipeline path # 5 - the out path args <- commandArgs(trailingOnly = TRUE) - #==============================================================================# # Make Metadata NEEDS TO BE ADAPATBLE TO MULTIPLE WAVELENGTHS #==============================================================================# @@ -29,7 +28,7 @@ meta1 <- read_delim( extract(file_path, into = "file", remove = FALSE, regex = ".*/(.*)$") %>% extract(file, remove = FALSE, - regex = "^(.*)-(.*)-(.*)-(.*)_(.*)_(.*)_\\.(.*)$", + regex = "^(.*)-(.*)-(.*)-(.*)_(.*)_(.*)\\.(.*)$", into = c("date","exp","plate","mag","well","wave","TIF")) %>% select(-TIF) %>% dplyr::mutate(row = stringr::str_extract(well, pattern = "[A-Z]"), @@ -43,7 +42,7 @@ meta1$group <- apply( meta1[, groups], 1, paste, collapse = "_") # add image types and set metadata names - hardcode image names - needs to be flexible for multiple pipeline profiles meta2 <- meta1 %>% -tidyr::pivot_wider(names_from = wave, values_from = c(file, file_path)) %>% + tidyr::pivot_wider(names_from = wave, values_from = c(file, file_path), ) %>% dplyr::rename(Image_FileName_RawBF = file_w1, Image_PathName_RawBF = file_path_w1, Image_FileName_RawRFP = file_w2, @@ -63,8 +62,10 @@ tidyr::pivot_wider(names_from = wave, values_from = c(file, file_path)) %>% Metadata_Well = well, Metadata_Group = group, Metadata_Magnification = mag, - Image_FileName_RawBF = file, + Image_FileName_RawBF, Image_PathName_RawBF, + Image_FileName_RawRFP, + Image_PathName_RawRFP, Image_FileName_wellmask_98.png, Image_PathName_wellmask_98.png) From 5e48ea20871756ab44e40946acf0dae46619a85e Mon Sep 17 00:00:00 2001 From: Brendan Kohrn Date: Tue, 15 Nov 2022 10:34:53 -0800 Subject: [PATCH 17/17] Update permissions --- input_data/CP_pipelines/dauer-nf.cppipe | 0 input_data/CP_pipelines/toxin-nf.cppipe | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 input_data/CP_pipelines/dauer-nf.cppipe mode change 100644 => 100755 input_data/CP_pipelines/toxin-nf.cppipe diff --git a/input_data/CP_pipelines/dauer-nf.cppipe b/input_data/CP_pipelines/dauer-nf.cppipe old mode 100644 new mode 100755 diff --git a/input_data/CP_pipelines/toxin-nf.cppipe b/input_data/CP_pipelines/toxin-nf.cppipe old mode 100644 new mode 100755