sof202 · sof202 · Feb 11, 2025 · Feb 11, 2025
diff --git a/JobSubmission/Peaks_to_Binary/1_convert_MACS_to_binary.sh b/JobSubmission/Peaks_to_Binary/1_convert_MACS_to_binary.sh
@@ -39,48 +39,11 @@ move_log_files convert
 ##   BINARIZATION   ##
 ## ================ ##
 
-rm -rf "${BINARY_DIR}/${epigenetic_mark_name:?}"
-mkdir -p "${BINARY_DIR}/${epigenetic_mark_name}"
-
-conda activate ChromBinarize-R
-Rscript ${RSCRIPT_DIR}/create_blank_bed_files.R \
-  "${REPO_DIR}" \
-  "${chromosome_sizes}" \
-  "${BIN_SIZE}" \
-  "${BINARY_DIR}/${epigenetic_mark_name}"
-conda deactivate
-
-logs "${DEBUG_MODE:0}" \
-"Generating ChromHMM binary files..."
-
-for chromosome in {1..22} X; do
-  output_binary_file="${BINARY_DIR}/${epigenetic_mark_name}/${cell_type}_chr${chromosome}_binary.txt"
-  echo -e "${cell_type}\tchr${chromosome}" > "${output_binary_file}"
-  echo "${epigenetic_mark_name}" >> "${output_binary_file}"
-
-  conda activate ChromBinarize-bedtools
-  bedtools intersect \
-    -wa \
-    -c \
-    -a "${BINARY_DIR}/${epigenetic_mark_name}/chromosome${chromosome}.bed" \
-    -b "${input_MACS_file}" | \
-    awk '{OFS="\t"} {print ($4 > 0 ? 1 : 0)}' >> \
-    "${output_binary_file}"
-  conda deactivate
-
-  number_of_signatures=$(awk 'NR>2 && $1>0' "${output_binary_file}" | wc -l)
-
-logs 1 \
-"chromosome ${chromosome} has:
-${number_of_signatures} signatures."
-
-    if [[ "${number_of_signatures}" -eq 0 ]]; then
-errors "${chromosome}'s binary file has no true/1 entries. 
-Please check to see if your input MACS file is empty."
-    fi
-
-  gzip "${output_binary_file}"
-
-  rm "${BINARY_DIR}/${epigenetic_mark_name}/chromosome${chromosome}.bed"
+all_marks=$(\
+  find "${macs_directory}" -type f -name "*.bed" | \
+  xargs -n 1 basename | \
+  cut -d. -f1\
+)
+for mark in ${all_marks}; do
+  binarization_convertToChromHMM "$mark"
 done
-
diff --git a/config-setup.txt b/config-setup.txt
@@ -84,9 +84,9 @@ beta_threshold=0.001
 ## CHIP_SEQ BINARIZATION ##
 ## --------------------- ##
 
-# This will appear as the epigenetic mark name in the resultant binary file
-epigenetic_mark_name=
-input_MACS_file=
+# A directory that contains all of the previously peak called datasets.
+# This expects bed files named `mark.bed`
+macs_directory=
 
 
 ## ------------- ##

diff --git a/functions/binarization.sh b/functions/binarization.sh
@@ -133,3 +133,58 @@ or your 'binomial threshold' in the config file is too strict."
   done
   conda deactivate
 }
+
+binarization_convertToChromHMM() {
+  epigenetic_mark_name=$1
+
+  rm -rf "${BINARY_DIR}/${epigenetic_mark_name:?}"
+  mkdir -p "${BINARY_DIR}/${epigenetic_mark_name}"
+
+  conda activate ChromBinarize-R
+  Rscript ${RSCRIPT_DIR}/create_blank_bed_files.R \
+    "${REPO_DIR}" \
+    "${chromosome_sizes}" \
+    "${BIN_SIZE}" \
+    "${BINARY_DIR}/${epigenetic_mark_name}"
+  conda deactivate
+
+  logs "${DEBUG_MODE:0}" \
+  "Generating ChromHMM binary files..."
+
+  input_MACS_file=$(\
+    find "${macs_directory}" \
+    -type f \
+    -name "${epigenetic_mark_name}*.bed"\
+  )
+
+  for chromosome in {1..22} X; do
+    output_binary_file="${BINARY_DIR}/${epigenetic_mark_name}/${cell_type}_chr${chromosome}_binary.txt"
+    echo -e "${cell_type}\tchr${chromosome}" > "${output_binary_file}"
+    echo "${epigenetic_mark_name}" >> "${output_binary_file}"
+
+    conda activate ChromBinarize-bedtools
+    bedtools intersect \
+      -wa \
+      -c \
+      -a "${BINARY_DIR}/${epigenetic_mark_name}/chromosome${chromosome}.bed" \
+      -b "${input_MACS_file}" | \
+      awk '{OFS="\t"} {print ($4 > 0 ? 1 : 0)}' >> \
+      "${output_binary_file}"
+    conda deactivate
+
+    number_of_signatures=$(awk 'NR>2 && $1>0' "${output_binary_file}" | wc -l)
+
+  logs 1 \
+  "chromosome ${chromosome} has:
+  ${number_of_signatures} signatures."
+
+      if [[ "${number_of_signatures}" -eq 0 ]]; then
+  errors "${chromosome}'s binary file has no true/1 entries. 
+  Please check to see if your input MACS file is empty."
+      fi
+
+    gzip "${output_binary_file}"
+
+    rm "${BINARY_DIR}/${epigenetic_mark_name}/chromosome${chromosome}.bed"
+  done
+}