From 391f5854a8bd9f3ed3335cbce72d915f22395a7f Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Sun, 11 May 2025 16:45:20 +0800 Subject: [PATCH 01/11] Update Run_UniDoc_from_scratch_structure_afdb.py --- .../scripts/Run_UniDoc_from_scratch_structure_afdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py index d1bce3d..b832551 100644 --- a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py +++ b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py @@ -9,7 +9,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) BINDIR = os.path.join(SCRIPT_DIR, 'bin') -UNIDOC = os.path.join(BINDIR, 'UniDoc_structure') +UNIDOC = os.path.join(BINDIR, 'UniDoc_struct') STRIDE = os.path.join(BINDIR, 'stride') pdb_to_fasta = "pdb_tofasta" pdb_selres= "pdb_selres" From f9be1f7e3dcc1e3889aff4830cb44fec2309e5f4 Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Fri, 16 May 2025 15:28:58 +0800 Subject: [PATCH 02/11] Update Run_UniDoc_from_scratch_structure_afdb.py --- .../Run_UniDoc_from_scratch_structure_afdb.py | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py index b832551..4009f2a 100644 --- a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py +++ b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py @@ -74,29 +74,35 @@ def main(): if_exist = os.path.exists(pdb_path_chopped) if if_exist and not is_empty: pdb = pdb_path_chopped - else: - pdb_path_chopped = None - try: - # Run secondary structure calculation with STRIDE - subprocess.check_output(f"{STRIDE} {pdb} -r{args.chain} > {pdb_ss} 2> /dev/null", shell=True) + try: + # Run secondary structure calculation with STRIDE + subprocess.check_output(f"{STRIDE} {pdb} -r{args.chain} > {pdb_ss} 2> /dev/null", shell=True) - # Run UniDoc - output = subprocess.check_output(f"{UNIDOC} {pdb} {args.chain} {pdb_ss}", shell=True) + # Run UniDoc + output = subprocess.check_output(f"{UNIDOC} {pdb} {args.chain} {pdb_ss}", shell=True) - # Format the output - output = str(output, 'utf-8').replace('~','-').replace(',','_').replace('/',',').rstrip('\n') + # Format the output + output = str(output, 'utf-8').replace('~','-').replace(',','_').replace('/',',').rstrip('\n') - domains = output.split(',') - ndoms = len(domains) - chopping = ','.join(natsorted(domains)) + domains = output.split(',') + ndoms = len(domains) + chopping = ','.join(natsorted(domains)) - if chopping == '': - chopping = "NULL" - ndoms = 0 + if chopping == '': + chopping = "NULL" + ndoms = 0 - except: - chopping = 'NO_SS' + except: + pdb_ss = None + pdb_path_chopped = None + chopping = 'NO_SS' + ndoms = 0 + + else: + pdb_ss = None + pdb_path_chopped = None + chopping = 'NULL' ndoms = 0 # end_time = time.time() - start_time From 5143e37187a5019eeba91210e27c5d883888544f Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Fri, 16 May 2025 15:30:39 +0800 Subject: [PATCH 03/11] Update Run_UniDoc_from_scratch_structure_afdb.py --- .../scripts/Run_UniDoc_from_scratch_structure_afdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py index 4009f2a..0ec7e6f 100644 --- a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py +++ b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py @@ -118,10 +118,10 @@ def main(): )) # Cleanup - if os.path.exists(pdb_ss): + if pdb_ss is not None and os.path.exists(pdb_ss): os.remove(pdb_ss) - if os.path.exists(pdb_path_chopped): + if pdb_path_chopped is not None and os.path.exists(pdb_path_chopped): os.remove(pdb_path_chopped) if __name__ == "__main__": From e530ae6c008ddcf9d0166575bd370c2755e45d31 Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 20:39:27 +0800 Subject: [PATCH 04/11] Delete ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py --- .../Run_UniDoc_from_scratch_structure_afdb.py | 128 ------------------ 1 file changed, 128 deletions(-) delete mode 100644 ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py diff --git a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py deleted file mode 100644 index 0ec7e6f..0000000 --- a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py +++ /dev/null @@ -1,128 +0,0 @@ -import os -import argparse -import subprocess -import hashlib -import re -import time - -from natsort import natsorted - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) -BINDIR = os.path.join(SCRIPT_DIR, 'bin') -UNIDOC = os.path.join(BINDIR, 'UniDoc_struct') -STRIDE = os.path.join(BINDIR, 'stride') -pdb_to_fasta = "pdb_tofasta" -pdb_selres= "pdb_selres" - -resndict = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', - 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', - 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', - 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V', - 'PAD': 'X'} - -def main(): - parser = argparse.ArgumentParser() - - group = parser.add_mutually_exclusive_group() - group.add_argument("-i", "--input", type=str, nargs="+", required=False, help="Pass a single file or list of filenames as -i 1ubq.pdb or *.pdb.") - group.add_argument("-l", "--list", type=str, required=False, help="Pass a file containing paths to files.") - - parser.add_argument("-c",dest='chain', required=False, type=str, default='A', help="the chain of parsed protein") - parser.add_argument("--out",dest='outfile', required=True, type=str, help="output file to write results to") - parser.add_argument("--inherit_chopping", type=str, required=True, default=None, help="Pass a file containing choppings from Merizo or Chainsaw. Rows should match targets in --input.") - - args = parser.parse_args() - - if args.input is not None: - files = args.input - - if args.list is not None: - with open(args.list, 'r') as fn: - files = [line.rstrip('\n') for line in fn] - - chopping_dict = {} - with open(args.inherit_chopping, 'r') as fn: - for line in fn: - line_split = line.rstrip('\n').split('\t') - chopping_dict[line_split[0]] = line_split[-2] - - with open(args.outfile, 'w') as fn: - for pdb_path in files: - start_time = time.time() - bn, ext = os.path.splitext(os.path.basename(pdb_path)) - chopping = chopping_dict[bn] - - pdb = os.path.realpath(pdb_path) - pdb_bn, pdb_ext = os.path.splitext(pdb) - pdb_ss = pdb + '.ss' - - fasta = str(subprocess.check_output(f"{pdb_to_fasta} {pdb}", shell=True), 'utf-8').split('\n')[1:-1] - seq = ''.join(fasta) - - md5 = hashlib.md5(seq.encode('utf-8')).hexdigest() - nres = len(seq) - - # pdb_selres.py -1:10,20:30 pdb.pdb - # If chopping is provided, then extract all domain residues from PDB using pdb_tools - # save as new file and point to the new file - if chopping not in ['NULL','NO_SS']: - pdb_path_chopped = pdb_bn + '_chopped.pdb' - resrng = chopping.replace('-',':').replace('_',',') # convert domain chopping into segments - subprocess.check_output(f"{pdb_selres} -{resrng} {pdb_path} > {pdb_path_chopped} 2> /dev/null", shell=True) - - is_empty = os.stat(pdb_path_chopped).st_size == 0 - if_exist = os.path.exists(pdb_path_chopped) - if if_exist and not is_empty: - pdb = pdb_path_chopped - - try: - # Run secondary structure calculation with STRIDE - subprocess.check_output(f"{STRIDE} {pdb} -r{args.chain} > {pdb_ss} 2> /dev/null", shell=True) - - # Run UniDoc - output = subprocess.check_output(f"{UNIDOC} {pdb} {args.chain} {pdb_ss}", shell=True) - - # Format the output - output = str(output, 'utf-8').replace('~','-').replace(',','_').replace('/',',').rstrip('\n') - - domains = output.split(',') - ndoms = len(domains) - chopping = ','.join(natsorted(domains)) - - if chopping == '': - chopping = "NULL" - ndoms = 0 - - except: - pdb_ss = None - pdb_path_chopped = None - chopping = 'NO_SS' - ndoms = 0 - - else: - pdb_ss = None - pdb_path_chopped = None - chopping = 'NULL' - ndoms = 0 - - # end_time = time.time() - start_time - - fn.write("{}\t{}\t{}\t{}\t{}\t{:.5f}\n".format( - bn, - md5, - nres, - ndoms, - chopping, - 1., - # end_time, - )) - - # Cleanup - if pdb_ss is not None and os.path.exists(pdb_ss): - os.remove(pdb_ss) - - if pdb_path_chopped is not None and os.path.exists(pdb_path_chopped): - os.remove(pdb_path_chopped) - -if __name__ == "__main__": - main() From eadc339c615875ecfc32f123352be7b449b737f7 Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 20:40:10 +0800 Subject: [PATCH 05/11] update --- .../Run_UniDoc_from_scratch_structure_afdb.py | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py diff --git a/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py new file mode 100644 index 0000000..9b52f5c --- /dev/null +++ b/ted_consensus_1.0/scripts/Run_UniDoc_from_scratch_structure_afdb.py @@ -0,0 +1,151 @@ +import os +import argparse +import subprocess +import hashlib +import re +import time + +from natsort import natsorted + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +BINDIR = os.path.join(SCRIPT_DIR, 'bin') +UNIDOC = os.path.join(BINDIR, 'UniDoc_struct') +STRIDE = os.path.join(BINDIR, 'stride') +pdb_to_fasta = "pdb_tofasta" +pdb_selres= "pdb_selres" + +resndict = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', + 'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', + 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', + 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V', + 'PAD': 'X'} + +def main(): + parser = argparse.ArgumentParser() + + group = parser.add_mutually_exclusive_group() + group.add_argument("-i", "--input", type=str, nargs="+", required=False, help="Pass a single file or list of filenames as -i 1ubq.pdb or *.pdb.") + group.add_argument("-l", "--list", type=str, required=False, help="Pass a file containing paths to files.") + + parser.add_argument("-c",dest='chain', required=False, type=str, default='A', help="the chain of parsed protein") + parser.add_argument("--out",dest='outfile', required=True, type=str, help="output file to write results to") + parser.add_argument("--inherit_chopping", type=str, required=True, default=None, help="Pass a file containing choppings from Merizo or Chainsaw. Rows should match targets in --input.") + + args = parser.parse_args() + + if args.input is not None: + files = args.input + + if args.list is not None: + with open(args.list, 'r') as fn: + files = [line.rstrip('\n') for line in fn] + + chopping_dict = {} + with open(args.inherit_chopping, 'r') as fn: + for line in fn: + line_split = line.rstrip('\n').split('\t') + chopping_dict[line_split[0]] = line_split[-2] + + with open(args.outfile, 'w') as fn: + for pdb_path in files: + start_time = time.time() + bn, ext = os.path.splitext(os.path.basename(pdb_path)) + chopping = chopping_dict[bn] + + pdb = os.path.realpath(pdb_path) + pdb_bn, pdb_ext = os.path.splitext(pdb) + pdb_ss = pdb + '.ss' + + fasta = str(subprocess.check_output(f"{pdb_to_fasta} {pdb}", shell=True), 'utf-8').split('\n')[1:-1] + seq = ''.join(fasta) + + md5 = hashlib.md5(seq.encode('utf-8')).hexdigest() + nres = len(seq) + + # pdb_selres.py -1:10,20:30 pdb.pdb + # If chopping is provided, then extract all domain residues from PDB using pdb_tools + # save as new file and point to the new file + if chopping not in ['NULL','NO_SS']: + print(f"Processing chopping for {bn}: {chopping}") + pdb_path_chopped = pdb_bn + '_chopped.pdb' + resrng = chopping.replace('-',':').replace('_',',') # convert domain chopping into segments + print(f"Extracting residues {resrng} from {pdb_path}") + subprocess.check_output(f"{pdb_selres} -{resrng} {pdb_path} > {pdb_path_chopped} 2> /dev/null", shell=True) + + is_empty = os.stat(pdb_path_chopped).st_size == 0 + if_exist = os.path.exists(pdb_path_chopped) + if if_exist and not is_empty: + print(f"Successfully created chopped PDB file: {pdb_path_chopped}") + pdb = pdb_path_chopped + else: + print(f"Warning: Chopped PDB file is empty or does not exist: {pdb_path_chopped}") + + try: + # Run secondary structure calculation with STRIDE + print(f"Running STRIDE on {pdb} for chain {args.chain}") + subprocess.check_output(f"{STRIDE} {pdb} -r{args.chain} > {pdb_ss} 2> /dev/null", shell=True) + + # Run UniDoc + print(f"Running UniDoc on {pdb}") + # UniDoc_struct needs to be run from the directory containing it as it looks for ./stride + # Change to the directory containing UNIDOC + unidoc_dir = os.path.dirname(UNIDOC) + original_dir = os.getcwd() + os.chdir(unidoc_dir) + + # Run UniDoc from its directory + output = subprocess.check_output(f"./UniDoc_struct {pdb} {args.chain} {pdb_ss}", shell=True) + + # Change back to original directory + os.chdir(original_dir) + # Format the output + output = str(output, 'utf-8').replace('~','-').replace(',','_').replace('/',',').rstrip('\n') + print(f"UniDoc output: {output}") + + domains = output.split(',') + ndoms = len(domains) + chopping = ','.join(natsorted(domains)) + print(f"Found {ndoms} domains: {chopping}") + + if chopping == '': + print("No domains found, setting chopping to NULL") + chopping = "NULL" + ndoms = 0 + + except Exception as e: + print(f"Error processing {pdb}: {str(e)}") + chopping = 'NO_SS' + ndoms = 0 + else: + print(f"Skipping processing for {bn} - chopping is {chopping}") + pdb_path_chopped = None + ndoms = 0 + + # end_time = time.time() - start_time + + fn.write("{}\t{}\t{}\t{}\t{}\t{:.5f}\n".format( + bn, + md5, + nres, + ndoms, + chopping, + 1., + # end_time, + )) + + # Cleanup temporary files + try: + if os.path.exists(pdb_ss): + os.remove(pdb_ss) + print(f"Cleaned up temporary file: {pdb_ss}") + + if pdb_path_chopped and os.path.exists(pdb_path_chopped): + os.remove(pdb_path_chopped) + print(f"Cleaned up temporary file: {pdb_path_chopped}") + except Exception as e: + print(f"Warning: Error during cleanup: {str(e)}") + + print("--------------------------------------------------------------") + +if __name__ == "__main__": + main() From 2e1c83674745b609024893dccb66a1fcc4480c66 Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 20:44:06 +0800 Subject: [PATCH 06/11] Delete ted_consensus_1.0/run_segmentation.sh --- ted_consensus_1.0/run_segmentation.sh | 104 -------------------------- 1 file changed, 104 deletions(-) delete mode 100755 ted_consensus_1.0/run_segmentation.sh diff --git a/ted_consensus_1.0/run_segmentation.sh b/ted_consensus_1.0/run_segmentation.sh deleted file mode 100755 index 5e76dab..0000000 --- a/ted_consensus_1.0/run_segmentation.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash - -# This file is a part of TED: The Encyclopedia of Domains. If you utilize or reference any content from this file, -# please cite the following paper: - -# Lau et al., 2024. Exploring structural diversity across the protein universe with The Encyclopedia of Domains. - -# Function to display usage message -usage() { - echo "Usage: $0 -i -o " - exit 1 -} - -# Check that the environment exists and activate it -VENV_DIR="ted_consensus" -if [ -d "$VENV_DIR" ]; then - source $VENV_DIR/bin/activate -else - echo "Virtual environment 'ted_consensus' does not exist." - echo "Please run 'bash setup.sh' to create and set up the virtual environment." - exit 1 -fi - -# Parse command-line arguments -while getopts "i:o:" opt; do - case $opt in - i) INPUT_DIR="$OPTARG" ;; - o) OUTPUT_DIR="$OPTARG" ;; - *) usage ;; - esac -done - -# Check if both input and output directories are provided -if [ -z "$INPUT_DIR" ] || [ -z "$OUTPUT_DIR" ]; then - usage -fi - -# Check if the input directory exists -if [ ! -d "$INPUT_DIR" ]; then - echo "Error: $INPUT_DIR is not a directory" - exit 1 -fi - -# Create the output directory if it doesn't exist -if [ ! -d "$OUTPUT_DIR" ]; then - mkdir -p "$OUTPUT_DIR" -fi - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -PY=$(which python) - -SEGMENT="${SCRIPT_DIR}/scripts/segment.sh" -CONSENSUS="${SCRIPT_DIR}/scripts/get_consensus.py" -FILTER_DOMAINS="${SCRIPT_DIR}/scripts/filter_domains_consensus.py" - -# Run Merizo on the input directory -out_merizo="${OUTPUT_DIR}/chopping_merizo.txt" -log_merizo="${OUTPUT_DIR}/chopping_merizo.log" -bash "${SEGMENT}" -i "${INPUT_DIR}" -m merizo -o "${OUTPUT_DIR}" > "${log_merizo}" 2>&1 - -if test ! -f "${out_merizo}" || test ! -s "${out_merizo}"; then - echo "Expected to find chopping file for Merizo at ${out_merizo}!" - exit 1 -fi - -# Run UniDoc on the Merizo output -out_unidoc="${OUTPUT_DIR}/chopping_unidoc.txt" -log_unidoc="${OUTPUT_DIR}/chopping_unidoc.log" -bash "${SEGMENT}" -i "${INPUT_DIR}" -m unidoc -o "${OUTPUT_DIR}" -c "${out_merizo}" > "${log_unidoc}" 2>&1 - -if test ! -f "${out_unidoc}" || test ! -s "${out_unidoc}"; then - echo "Expected to find chopping file for UniDoc at ${out_unidoc}!" - exit 1 -fi - -# Run Chainsaw on the input directory -out_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.txt" -log_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.log" -bash "${SEGMENT}" -i "${INPUT_DIR}" -m chainsaw -o "${OUTPUT_DIR}" > "${log_chainsaw}" 2>&1 - -if test ! -f "${out_chainsaw}" || test ! -s "${out_chainsaw}"; then - echo "Expected to find chopping file for Chainsaw at ${out_chainsaw}!" - exit 1 -fi - -echo "Calculating consensus domains from Merizo, UniDoc and Chainsaw outputs.. " - -# Calculate consensus from each of the outputs -out_consensus="${OUTPUT_DIR}/consensus.tsv" -log_consensus="${OUTPUT_DIR}/consensus.log" -"${PY}" "${CONSENSUS}" -c "${out_merizo}" "${out_chainsaw}" "${out_unidoc}" -o "${out_consensus}" > "${log_consensus}" 2>&1 - -if test -f "${out_consensus}"; then - "${PY}" "${FILTER_DOMAINS}" "${out_consensus}" -o "${out_consensus}.tmp" - - if [ $? == 0 ]; then - mv "${out_consensus}.tmp" "${out_consensus}" - fi -else - echo "Expected to find consensus domain file at ${out_consensus}" - exit 1 -fi - -echo "Consensus domain file saved at ${out_consensus}" \ No newline at end of file From 461ec09ce00d5dd3e68d84c501ef36fb15379777 Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 20:44:56 +0800 Subject: [PATCH 07/11] Add files via upload change --- ted_consensus_1.0/run_segmentation.sh | 147 ++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 ted_consensus_1.0/run_segmentation.sh diff --git a/ted_consensus_1.0/run_segmentation.sh b/ted_consensus_1.0/run_segmentation.sh new file mode 100644 index 0000000..ac28301 --- /dev/null +++ b/ted_consensus_1.0/run_segmentation.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +# This file is a part of TED: The Encyclopedia of Domains. If you utilize or reference any content from this file, +# please cite the following paper: + +# Lau et al., 2024. Exploring structural diversity across the protein universe with The Encyclopedia of Domains. + +# Function to display usage message +usage() { + echo "Usage: $0 -i -o " + exit 1 +} + +# Check that the environment exists and activate it +BASE_DIR="/root/data/ted-tools-main/ted_consensus_1.0" +VENV_DIR="${BASE_DIR}/ted_consensus" +if [ -d "$VENV_DIR" ]; then + source $VENV_DIR/bin/activate +else + echo "Virtual environment 'ted_consensus' does not exist." + echo "Please run 'bash setup.sh' to create and set up the virtual environment." + exit 1 +fi + +# Parse command-line arguments +while getopts "i:o:" opt; do + case $opt in + i) INPUT_DIR="$OPTARG" ;; + o) OUTPUT_DIR="$OPTARG" ;; + *) usage ;; + esac +done + +# Check if both input and output directories are provided +if [ -z "$INPUT_DIR" ] || [ -z "$OUTPUT_DIR" ]; then + usage +fi + +# Check if the input directory exists +if [ ! -d "$INPUT_DIR" ]; then + echo "Error: $INPUT_DIR is not a directory" + exit 1 +fi + +# Create the output directory if it doesn't exist +if [ ! -d "$OUTPUT_DIR" ]; then + mkdir -p "$OUTPUT_DIR" +fi + +SCRIPT_DIR="/root/data/ted-tools-main/ted_consensus_1.0" +PY=$(which python) + +SEGMENT="${SCRIPT_DIR}/scripts/segment.sh" +CONSENSUS="${SCRIPT_DIR}/scripts/get_consensus.py" +FILTER_DOMAINS="${SCRIPT_DIR}/scripts/filter_domains_consensus.py" + +# Calculate input data +input_pdb_count=$(find "${INPUT_DIR}" -maxdepth 1 -name '*.pdb' | wc -l) + +# Run Merizo on the input directory +out_merizo="${OUTPUT_DIR}/chopping_merizo.txt" +log_merizo="${OUTPUT_DIR}/chopping_merizo.log" +if test -f "${out_merizo}"; then + # Output file exists, check the line count + merizo_count=$(wc -l < "${out_merizo}") + # Result count is equal, skip execution + if [ "${merizo_count}" -eq "${input_pdb_count}" ]; then + echo "${out_merizo} already exists" + # Result count is not equal, delete the output file and log file, and execute again + else + rm -f "${out_merizo}" "${log_merizo}" + bash "${SEGMENT}" -i "${INPUT_DIR}" -m merizo -o "${OUTPUT_DIR}" > "${log_merizo}" 2>&1 + fi +# Output file does not exist, execute directly +else + rm -f "${out_merizo}" "${log_merizo}" + bash "${SEGMENT}" -i "${INPUT_DIR}" -m merizo -o "${OUTPUT_DIR}" > "${log_merizo}" 2>&1 +fi + +if test ! -f "${out_merizo}" || test ! -s "${out_merizo}"; then + echo "Expected to find chopping file for Merizo at ${out_merizo}!" + exit 1 +fi + +# Run UniDoc on the Merizo output +out_unidoc="${OUTPUT_DIR}/chopping_unidoc.txt" +log_unidoc="${OUTPUT_DIR}/chopping_unidoc.log" +if test -f "${out_unidoc}"; then + # Output file exists, check the line count + unidoc_count=$(wc -l < "${out_unidoc}") + if [ "${unidoc_count}" -eq "${input_pdb_count}" ]; then + echo "${out_unidoc} already exists" + else + rm -f "${out_unidoc}" "${log_unidoc}" + bash "${SEGMENT}" -i "${INPUT_DIR}" -m unidoc -o "${OUTPUT_DIR}" -c "${out_merizo}" > "${log_unidoc}" 2>&1 + fi +else + rm -f "${out_unidoc}" "${log_unidoc}" + bash "${SEGMENT}" -i "${INPUT_DIR}" -m unidoc -o "${OUTPUT_DIR}" -c "${out_merizo}" > "${log_unidoc}" 2>&1 +fi + +if test ! -f "${out_unidoc}" || test ! -s "${out_unidoc}"; then + echo "Expected to find chopping file for UniDoc at ${out_unidoc}!" + exit 1 +fi + +# Run Chainsaw on the input directory +out_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.txt" +log_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.log" +if test -f "${out_chainsaw}"; then + # Output file exists, check the line count + chainsaw_count=$(wc -l < "${out_chainsaw}") + if [ "${chainsaw_count}" -eq "${input_pdb_count}" ]; then + echo "${out_chainsaw} already exists" + else + rm -f "${out_chainsaw}" "${log_chainsaw}" + bash "${SEGMENT}" -i "${INPUT_DIR}" -m chainsaw -o "${OUTPUT_DIR}" > "${log_chainsaw}" 2>&1 + fi +else + rm -f "${out_chainsaw}" "${log_chainsaw}" + bash "${SEGMENT}" -i "${INPUT_DIR}" -m chainsaw -o "${OUTPUT_DIR}" > "${log_chainsaw}" 2>&1 +fi + +if test ! -f "${out_chainsaw}" || test ! -s "${out_chainsaw}"; then + echo "Expected to find chopping file for Chainsaw at ${out_chainsaw}!" + exit 1 +fi + + +echo "Calculating consensus domains from Merizo, UniDoc and Chainsaw outputs.. " +# Calculate consensus from each of the outputs +out_consensus="${OUTPUT_DIR}/consensus.tsv" +log_consensus="${OUTPUT_DIR}/consensus.log" +"${PY}" "${CONSENSUS}" -c "${out_merizo}" "${out_chainsaw}" "${out_unidoc}" -o "${out_consensus}" > "${log_consensus}" 2>&1 + +if test -f "${out_consensus}"; then + "${PY}" "${FILTER_DOMAINS}" "${out_consensus}" -o "${out_consensus}.tmp" + + if [ $? == 0 ]; then + mv "${out_consensus}.tmp" "${out_consensus}" + fi +else + echo "Expected to find consensus domain file at ${out_consensus}" + exit 1 +fi + +echo "Consensus domain file saved at ${out_consensus}" From fd0ca1b0920d6234413cf3a20164d747ba4db561 Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 20:50:00 +0800 Subject: [PATCH 08/11] Delete ted_consensus_1.0/scripts/segment.sh --- ted_consensus_1.0/scripts/segment.sh | 101 --------------------------- 1 file changed, 101 deletions(-) delete mode 100644 ted_consensus_1.0/scripts/segment.sh diff --git a/ted_consensus_1.0/scripts/segment.sh b/ted_consensus_1.0/scripts/segment.sh deleted file mode 100644 index 6cbfabd..0000000 --- a/ted_consensus_1.0/scripts/segment.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash - -# This file is a part of TED: The Encyclopedia of Domains. If you utilize or reference any content from this file, -# please cite the following paper: - -# Lau et al., 2024. Exploring structural diversity across the protein universe with The Encyclopedia of Domains. - -# Script for running segmentation methods given a directory of structures. - -# Usage: -# bash run_segment_afdb.sh -i -m - -set -eu - -# Directories and paths -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -PROG_DIR="${SCRIPT_DIR}/../programs" - -py=$(which python) -custom_chopping='' - -FILTER_DOMAINS="${SCRIPT_DIR}/filter_domains.py" - -while getopts ":i:m:o:c:" opt; do - case $opt in - i) inputs=$(readlink -f "$OPTARG") ;; - m) method=$OPTARG ;; - o) output=$(readlink -f "$OPTARG") ;; - c) custom_chopping=${OPTARG} ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - exit 1 - ;; - :) - echo "Option -$OPTARG requires an argument." >&2 - exit 1 - ;; - esac -done - -# Check if both options are provided -if [[ -z "${inputs}" || -z "${method}" || -z "${output}" ]]; then - echo "Usage: run_segment_afdb.sh -i -m -o [-c ]" - exit 1 -fi - -case $method in - "merizo") - RUN_SCRIPT="${PROG_DIR}/merizo/predict_afdb.py" - OFFSET_RESI=0 - ;; - "unidoc") - RUN_SCRIPT="${PROG_DIR}/unidoc/Run_UniDoc_from_scratch_structure_afdb.py" - OFFSET_RESI=0 - ;; - "chainsaw") - RUN_SCRIPT="${PROG_DIR}/chainsaw/get_predictions.py" - OFFSET_RESI=1 - ;; - *) - echo "Invalid method: ${method}. Allowed options are 'merizo', 'unidoc', or 'chainsaw'." - exit 1 - ;; -esac - -# Run method -output_file="${output%/}/chopping_${method}.txt" - -echo "Running ${method} on targets in ${inputs}" - -# Each method will take the list containing the paths to the targets -if [ "${method}" = "merizo" ] || [ "${method}" = "unidoc" ]; then - target_list="${output%/}targets.txt" - readlink -f "${inputs}/"*.pdb > "${target_list}" - - if [[ ${custom_chopping} == '' ]]; then - ${py} "${RUN_SCRIPT}" -l "${target_list}" --out "${output_file}" - else - ${py} "${RUN_SCRIPT}" -l "${target_list}" --out "${output_file}" --inherit_chopping "${custom_chopping}" - fi - - # Cleanup - if test -f "${target_list}"; then - rm "${target_list}" - fi - -elif [ "${method}" = "chainsaw" ]; then - ${py} "${RUN_SCRIPT}" --structure_directory "${inputs}" -o "${output_file}" --append -fi - -# Filter choppings to remove small segments and single-residue domains -if test -f "${output_file}"; then - "${py}" "${FILTER_DOMAINS}" "${output_file}" -o "${output_file}.tmp" --offset_resi "${OFFSET_RESI}" - - if [ $? == 0 ]; then - mv "${output_file}.tmp" "${output_file}" - fi -else - echo "Expected to find output file at ${output_file}" - exit 1 -fi From c5a1a043cb82587f43eb956a2a0091574334cecc Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 20:50:24 +0800 Subject: [PATCH 09/11] Add files via upload --- ted_consensus_1.0/scripts/segment.sh | 101 +++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 ted_consensus_1.0/scripts/segment.sh diff --git a/ted_consensus_1.0/scripts/segment.sh b/ted_consensus_1.0/scripts/segment.sh new file mode 100644 index 0000000..dfc5302 --- /dev/null +++ b/ted_consensus_1.0/scripts/segment.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# This file is a part of TED: The Encyclopedia of Domains. If you utilize or reference any content from this file, +# please cite the following paper: + +# Lau et al., 2024. Exploring structural diversity across the protein universe with The Encyclopedia of Domains. + +# Script for running segmentation methods given a directory of structures. + +# Usage: +# bash run_segment_afdb.sh -i -m + +set -eu + +# Directories and paths +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +PROG_DIR="${SCRIPT_DIR}/../programs" + +py=$(which python) +custom_chopping='' + +FILTER_DOMAINS="${SCRIPT_DIR}/filter_domains.py" + +while getopts ":i:m:o:c:" opt; do + case $opt in + i) inputs=$(readlink -f "$OPTARG") ;; + m) method=$OPTARG ;; + o) output=$(readlink -f "$OPTARG") ;; + c) custom_chopping=${OPTARG} ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + exit 1 + ;; + esac +done + +# Check if both options are provided +if [[ -z "${inputs}" || -z "${method}" || -z "${output}" ]]; then + echo "Usage: run_segment_afdb.sh -i -m -o [-c ]" + exit 1 +fi + +case $method in + "merizo") + RUN_SCRIPT="${PROG_DIR}/merizo/predict_afdb.py" + OFFSET_RESI=0 + ;; + "unidoc") + RUN_SCRIPT="${PROG_DIR}/unidoc/Run_UniDoc_from_scratch_structure_afdb.py" + OFFSET_RESI=0 + ;; + "chainsaw") + RUN_SCRIPT="${PROG_DIR}/chainsaw/get_predictions.py" + OFFSET_RESI=1 + ;; + *) + echo "Invalid method: ${method}. Allowed options are 'merizo', 'unidoc', or 'chainsaw'." + exit 1 + ;; +esac + +# Run method +output_file="${output%/}/chopping_${method}.txt" + +echo "Running ${method} on targets in ${inputs}" + +# Each method will take the list containing the paths to the targets +if [ "${method}" = "merizo" ] || [ "${method}" = "unidoc" ]; then + target_list="${output%/}targets.txt" + find "${inputs}/" -type f -name "*.pdb" -printf "%p\n" > "${target_list}" + + if [[ ${custom_chopping} == '' ]]; then + ${py} "${RUN_SCRIPT}" -l "${target_list}" --out "${output_file}" + else + ${py} "${RUN_SCRIPT}" -l "${target_list}" --out "${output_file}" --inherit_chopping "${custom_chopping}" + fi + + # Cleanup + if test -f "${target_list}"; then + rm "${target_list}" + fi + +elif [ "${method}" = "chainsaw" ]; then + ${py} "${RUN_SCRIPT}" --structure_directory "${inputs}" -o "${output_file}" --append +fi + +# Filter choppings to remove small segments and single-residue domains +if test -f "${output_file}"; then + "${py}" "${FILTER_DOMAINS}" "${output_file}" -o "${output_file}.tmp" --offset_resi "${OFFSET_RESI}" + + if [ $? == 0 ]; then + mv "${output_file}.tmp" "${output_file}" + fi +else + echo "Expected to find output file at ${output_file}" + exit 1 +fi From 830225603cb621d832447c3a506abbe508bd7b21 Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 21:03:02 +0800 Subject: [PATCH 10/11] Delete ted_consensus_1.0/run_segmentation.sh --- ted_consensus_1.0/run_segmentation.sh | 147 -------------------------- 1 file changed, 147 deletions(-) delete mode 100644 ted_consensus_1.0/run_segmentation.sh diff --git a/ted_consensus_1.0/run_segmentation.sh b/ted_consensus_1.0/run_segmentation.sh deleted file mode 100644 index ac28301..0000000 --- a/ted_consensus_1.0/run_segmentation.sh +++ /dev/null @@ -1,147 +0,0 @@ -#!/bin/bash - -# This file is a part of TED: The Encyclopedia of Domains. If you utilize or reference any content from this file, -# please cite the following paper: - -# Lau et al., 2024. Exploring structural diversity across the protein universe with The Encyclopedia of Domains. - -# Function to display usage message -usage() { - echo "Usage: $0 -i -o " - exit 1 -} - -# Check that the environment exists and activate it -BASE_DIR="/root/data/ted-tools-main/ted_consensus_1.0" -VENV_DIR="${BASE_DIR}/ted_consensus" -if [ -d "$VENV_DIR" ]; then - source $VENV_DIR/bin/activate -else - echo "Virtual environment 'ted_consensus' does not exist." - echo "Please run 'bash setup.sh' to create and set up the virtual environment." - exit 1 -fi - -# Parse command-line arguments -while getopts "i:o:" opt; do - case $opt in - i) INPUT_DIR="$OPTARG" ;; - o) OUTPUT_DIR="$OPTARG" ;; - *) usage ;; - esac -done - -# Check if both input and output directories are provided -if [ -z "$INPUT_DIR" ] || [ -z "$OUTPUT_DIR" ]; then - usage -fi - -# Check if the input directory exists -if [ ! -d "$INPUT_DIR" ]; then - echo "Error: $INPUT_DIR is not a directory" - exit 1 -fi - -# Create the output directory if it doesn't exist -if [ ! -d "$OUTPUT_DIR" ]; then - mkdir -p "$OUTPUT_DIR" -fi - -SCRIPT_DIR="/root/data/ted-tools-main/ted_consensus_1.0" -PY=$(which python) - -SEGMENT="${SCRIPT_DIR}/scripts/segment.sh" -CONSENSUS="${SCRIPT_DIR}/scripts/get_consensus.py" -FILTER_DOMAINS="${SCRIPT_DIR}/scripts/filter_domains_consensus.py" - -# Calculate input data -input_pdb_count=$(find "${INPUT_DIR}" -maxdepth 1 -name '*.pdb' | wc -l) - -# Run Merizo on the input directory -out_merizo="${OUTPUT_DIR}/chopping_merizo.txt" -log_merizo="${OUTPUT_DIR}/chopping_merizo.log" -if test -f "${out_merizo}"; then - # Output file exists, check the line count - merizo_count=$(wc -l < "${out_merizo}") - # Result count is equal, skip execution - if [ "${merizo_count}" -eq "${input_pdb_count}" ]; then - echo "${out_merizo} already exists" - # Result count is not equal, delete the output file and log file, and execute again - else - rm -f "${out_merizo}" "${log_merizo}" - bash "${SEGMENT}" -i "${INPUT_DIR}" -m merizo -o "${OUTPUT_DIR}" > "${log_merizo}" 2>&1 - fi -# Output file does not exist, execute directly -else - rm -f "${out_merizo}" "${log_merizo}" - bash "${SEGMENT}" -i "${INPUT_DIR}" -m merizo -o "${OUTPUT_DIR}" > "${log_merizo}" 2>&1 -fi - -if test ! -f "${out_merizo}" || test ! -s "${out_merizo}"; then - echo "Expected to find chopping file for Merizo at ${out_merizo}!" - exit 1 -fi - -# Run UniDoc on the Merizo output -out_unidoc="${OUTPUT_DIR}/chopping_unidoc.txt" -log_unidoc="${OUTPUT_DIR}/chopping_unidoc.log" -if test -f "${out_unidoc}"; then - # Output file exists, check the line count - unidoc_count=$(wc -l < "${out_unidoc}") - if [ "${unidoc_count}" -eq "${input_pdb_count}" ]; then - echo "${out_unidoc} already exists" - else - rm -f "${out_unidoc}" "${log_unidoc}" - bash "${SEGMENT}" -i "${INPUT_DIR}" -m unidoc -o "${OUTPUT_DIR}" -c "${out_merizo}" > "${log_unidoc}" 2>&1 - fi -else - rm -f "${out_unidoc}" "${log_unidoc}" - bash "${SEGMENT}" -i "${INPUT_DIR}" -m unidoc -o "${OUTPUT_DIR}" -c "${out_merizo}" > "${log_unidoc}" 2>&1 -fi - -if test ! -f "${out_unidoc}" || test ! -s "${out_unidoc}"; then - echo "Expected to find chopping file for UniDoc at ${out_unidoc}!" - exit 1 -fi - -# Run Chainsaw on the input directory -out_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.txt" -log_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.log" -if test -f "${out_chainsaw}"; then - # Output file exists, check the line count - chainsaw_count=$(wc -l < "${out_chainsaw}") - if [ "${chainsaw_count}" -eq "${input_pdb_count}" ]; then - echo "${out_chainsaw} already exists" - else - rm -f "${out_chainsaw}" "${log_chainsaw}" - bash "${SEGMENT}" -i "${INPUT_DIR}" -m chainsaw -o "${OUTPUT_DIR}" > "${log_chainsaw}" 2>&1 - fi -else - rm -f "${out_chainsaw}" "${log_chainsaw}" - bash "${SEGMENT}" -i "${INPUT_DIR}" -m chainsaw -o "${OUTPUT_DIR}" > "${log_chainsaw}" 2>&1 -fi - -if test ! -f "${out_chainsaw}" || test ! -s "${out_chainsaw}"; then - echo "Expected to find chopping file for Chainsaw at ${out_chainsaw}!" - exit 1 -fi - - -echo "Calculating consensus domains from Merizo, UniDoc and Chainsaw outputs.. " -# Calculate consensus from each of the outputs -out_consensus="${OUTPUT_DIR}/consensus.tsv" -log_consensus="${OUTPUT_DIR}/consensus.log" -"${PY}" "${CONSENSUS}" -c "${out_merizo}" "${out_chainsaw}" "${out_unidoc}" -o "${out_consensus}" > "${log_consensus}" 2>&1 - -if test -f "${out_consensus}"; then - "${PY}" "${FILTER_DOMAINS}" "${out_consensus}" -o "${out_consensus}.tmp" - - if [ $? == 0 ]; then - mv "${out_consensus}.tmp" "${out_consensus}" - fi -else - echo "Expected to find consensus domain file at ${out_consensus}" - exit 1 -fi - -echo "Consensus domain file saved at ${out_consensus}" From dd26f36848b0d3bd2ce92f97eb1dbbd8b83f52df Mon Sep 17 00:00:00 2001 From: Weiyin Wu <59970244+TigerWindWood@users.noreply.github.com> Date: Thu, 29 May 2025 21:04:31 +0800 Subject: [PATCH 11/11] Add files via upload --- ted_consensus_1.0/run_segmentation.sh | 104 ++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 ted_consensus_1.0/run_segmentation.sh diff --git a/ted_consensus_1.0/run_segmentation.sh b/ted_consensus_1.0/run_segmentation.sh new file mode 100644 index 0000000..5e76dab --- /dev/null +++ b/ted_consensus_1.0/run_segmentation.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +# This file is a part of TED: The Encyclopedia of Domains. If you utilize or reference any content from this file, +# please cite the following paper: + +# Lau et al., 2024. Exploring structural diversity across the protein universe with The Encyclopedia of Domains. + +# Function to display usage message +usage() { + echo "Usage: $0 -i -o " + exit 1 +} + +# Check that the environment exists and activate it +VENV_DIR="ted_consensus" +if [ -d "$VENV_DIR" ]; then + source $VENV_DIR/bin/activate +else + echo "Virtual environment 'ted_consensus' does not exist." + echo "Please run 'bash setup.sh' to create and set up the virtual environment." + exit 1 +fi + +# Parse command-line arguments +while getopts "i:o:" opt; do + case $opt in + i) INPUT_DIR="$OPTARG" ;; + o) OUTPUT_DIR="$OPTARG" ;; + *) usage ;; + esac +done + +# Check if both input and output directories are provided +if [ -z "$INPUT_DIR" ] || [ -z "$OUTPUT_DIR" ]; then + usage +fi + +# Check if the input directory exists +if [ ! -d "$INPUT_DIR" ]; then + echo "Error: $INPUT_DIR is not a directory" + exit 1 +fi + +# Create the output directory if it doesn't exist +if [ ! -d "$OUTPUT_DIR" ]; then + mkdir -p "$OUTPUT_DIR" +fi + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +PY=$(which python) + +SEGMENT="${SCRIPT_DIR}/scripts/segment.sh" +CONSENSUS="${SCRIPT_DIR}/scripts/get_consensus.py" +FILTER_DOMAINS="${SCRIPT_DIR}/scripts/filter_domains_consensus.py" + +# Run Merizo on the input directory +out_merizo="${OUTPUT_DIR}/chopping_merizo.txt" +log_merizo="${OUTPUT_DIR}/chopping_merizo.log" +bash "${SEGMENT}" -i "${INPUT_DIR}" -m merizo -o "${OUTPUT_DIR}" > "${log_merizo}" 2>&1 + +if test ! -f "${out_merizo}" || test ! -s "${out_merizo}"; then + echo "Expected to find chopping file for Merizo at ${out_merizo}!" + exit 1 +fi + +# Run UniDoc on the Merizo output +out_unidoc="${OUTPUT_DIR}/chopping_unidoc.txt" +log_unidoc="${OUTPUT_DIR}/chopping_unidoc.log" +bash "${SEGMENT}" -i "${INPUT_DIR}" -m unidoc -o "${OUTPUT_DIR}" -c "${out_merizo}" > "${log_unidoc}" 2>&1 + +if test ! -f "${out_unidoc}" || test ! -s "${out_unidoc}"; then + echo "Expected to find chopping file for UniDoc at ${out_unidoc}!" + exit 1 +fi + +# Run Chainsaw on the input directory +out_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.txt" +log_chainsaw="${OUTPUT_DIR}/chopping_chainsaw.log" +bash "${SEGMENT}" -i "${INPUT_DIR}" -m chainsaw -o "${OUTPUT_DIR}" > "${log_chainsaw}" 2>&1 + +if test ! -f "${out_chainsaw}" || test ! -s "${out_chainsaw}"; then + echo "Expected to find chopping file for Chainsaw at ${out_chainsaw}!" + exit 1 +fi + +echo "Calculating consensus domains from Merizo, UniDoc and Chainsaw outputs.. " + +# Calculate consensus from each of the outputs +out_consensus="${OUTPUT_DIR}/consensus.tsv" +log_consensus="${OUTPUT_DIR}/consensus.log" +"${PY}" "${CONSENSUS}" -c "${out_merizo}" "${out_chainsaw}" "${out_unidoc}" -o "${out_consensus}" > "${log_consensus}" 2>&1 + +if test -f "${out_consensus}"; then + "${PY}" "${FILTER_DOMAINS}" "${out_consensus}" -o "${out_consensus}.tmp" + + if [ $? == 0 ]; then + mv "${out_consensus}.tmp" "${out_consensus}" + fi +else + echo "Expected to find consensus domain file at ${out_consensus}" + exit 1 +fi + +echo "Consensus domain file saved at ${out_consensus}" \ No newline at end of file