From 6965702300790ab28c8f23be12aa0d2c8f329ebb Mon Sep 17 00:00:00 2001
From: Alex Lee <Alex.lee@ucsf.edu>
Date: Tue, 15 Jul 2025 21:49:48 -0700
Subject: [PATCH 1/3] figure files

---
 analysis/fig-s4.py     |  94 ++++++++++++++++++++++++++++++
 analysis/fig-s5.py     |  89 ++++++++++++++++++++++++++++
 analysis/figure2b+d.py |  60 +++++++++++++++++++
 analysis/figure2c.py   | 129 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 372 insertions(+)
 create mode 100644 analysis/fig-s4.py
 create mode 100644 analysis/fig-s5.py
 create mode 100644 analysis/figure2b+d.py
 create mode 100644 analysis/figure2c.py

diff --git a/analysis/fig-s4.py b/analysis/fig-s4.py
new file mode 100644
index 0000000..6799c48
--- /dev/null
+++ b/analysis/fig-s4.py
@@ -0,0 +1,94 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import tqdm
+import colorcet
+
+sns.set(font_scale=1.7)
+sns.set_style("whitegrid")
+plt.rcParams["axes.grid"] = False
+plt.rc("axes", edgecolor="black")
+plt.rc(
+    "text.latex",
+    preamble=r"\usepackage{newpxtext}\usepackage{newpxmath}\usepackage{commath}\usepackage{mathtools}",
+)
+plt.rc("font", family="serif", size=16.0, weight="medium")
+plt.rc("savefig", dpi=500)
+plt.rc("legend", loc="best", fontsize="medium", fancybox=True, framealpha=0.5)
+plt.rc("lines", linewidth=2.5, markersize=10, markeredgewidth=2.5)
+plt.rc("axes", titlepad=10)
+
+
+both = pd.read_csv('pdb_plus_240k/scRMSD_best_240k_plus_pdbFirst_aln0_cluster.tsv', sep='\t',
+                names=['representative', 'member'])
+
+output_rows = []
+for resample_freq in tqdm.tqdm((0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)):
+    for iter_n in range(5):
+        resampled = both.sample(frac=resample_freq, random_state=iter_n, replace=False)
+        n_uniq_clust = resampled['representative'].nunique()
+        output_rows.append(dict(
+            freq=resample_freq,
+            iteration=iter_n,
+            num_uniq_clust=n_uniq_clust,
+            comparison=name,
+        ))
+        
+    output_rows.append(dict(freq=1, iteration=0, comparison=name, num_uniq_clust=both.representative.nunique()))
+    output_rows.append(dict(freq=0, iteration=0, comparison=name, num_uniq_clust=0))
+
+df_full_resamp = pd.DataFrame(output_rows)
+
+output_rows = []
+for full_df, name in zip((both,), ('PDB + syn',)):
+    for resample_freq in tqdm.tqdm((0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)):
+        # names of synthetic samples will start with date
+        rest_df = full_df[~full_df['member'].str.startswith("2024")]
+        df = full_df[full_df['member'].str.startswith("2024")]
+        rest_u = rest_df.representative.nunique()
+        
+        for iter_n in range(5):
+            resampled = df.sample(frac=resample_freq, random_state=iter_n, replace=False)
+            n_uniq_clust = resampled['representative'].nunique() + rest_u
+            output_rows.append(dict(
+                freq=resample_freq,
+                iteration=iter_n,
+                num_uniq_clust=n_uniq_clust,
+                comparison=name,
+            ))
+            
+    output_rows.append(dict(freq=1, iteration=0, comparison=name, num_uniq_clust=full_df.representative.nunique()))
+    output_rows.append(dict(freq=0, iteration=0, comparison=name, num_uniq_clust=0))
+
+df_partial_resamp = pd.DataFrame(output_rows)
+
+plt.figure()
+sns.lineplot(data=df_partial_resamp, 
+            x='freq', 
+            y='num_uniq_clust', 
+            label='Partial resampling',
+            color='#0D96C9',)
+
+# Plot the full resampling line in blue
+sns.lineplot(data=df_full_resamp, 
+            x='freq', 
+            y='num_uniq_clust', 
+            label='Full resampling',
+             color='#404040',
+            )
+
+plt.xlabel("Proportion of data points \nsampled")
+plt.ylabel("No. of clusters")
+plt.xticks(np.arange(0, 1.1, 0.1), rotation=75)
+plt.legend(title=None, loc='best', frameon=False)
+plt.title("Number of distinct clusters by\n resampling frequency")
+
+# Remove top and right spines
+plt.gca().spines['top'].set_visible(False)
+plt.gca().spines['right'].set_visible(False)
+
+plt.tight_layout()
+
+
+
diff --git a/analysis/fig-s5.py b/analysis/fig-s5.py
new file mode 100644
index 0000000..0ade421
--- /dev/null
+++ b/analysis/fig-s5.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+import pathlib
+import pandas as pd
+import pathlib
+import pandas as pd
+import joblib
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+import re
+
+sns.set(font_scale=1.7)
+sns.set_style("whitegrid")
+plt.rcParams['axes.grid'] = False
+plt.rc('axes',edgecolor='black')
+
+plt.rc("text", usetex=False)
+plt.rc(
+    "text.latex",
+    preamble=r"\usepackage{newpxtext}\usepackage{newpxmath}\usepackage{commath}\usepackage{mathtools}",
+)
+plt.rcParams['font.family'] = 'sans-serif'
+plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
+# plt.rc("font", family="serif", size=16.0, weight="medium")
+plt.rc("savefig", dpi=500)
+plt.rc("legend", loc="best", fontsize="medium", fancybox=True, framealpha=0.5)
+plt.rc("lines", linewidth=2.5, markersize=10, markeredgewidth=2.5)
+plt.rc("axes", titlepad=10)
+
+colors = ["#BBBBBB", "#33BBEE", "#EE3377", "#009988", "#CC3311", "#0077BB"]
+colors = list(reversed(colors))
+sns.set_palette(sns.color_palette(colors))
+# set mpl palette
+plt.rcParams["axes.prop_cycle"] = plt.cycler(color=colors)
+
+
+# In[2]:
+
+
+df = pd.read_parquet('temperature_scRMSD_gridsearch_results.parquet')
+
+
+# In[3]:
+
+
+averages = df.groupby(["temperature", 
+                      'backbone_pdb'])[
+    ["scRMSD", "TM", "aa_length"]
+].mean()
+averages = averages.reset_index()
+
+averages["pass"] = averages["scRMSD"] <= 2
+averages["pass_tm"] = averages["TM"] >= 0.5
+bins = np.arange(0, 1100, 100)
+
+
+# In[4]:
+
+
+plt.figure() 
+averages['pass_pct'] = averages['pass'] * 100
+sns.lineplot(data=averages, 
+           x='temperature', 
+           y='pass_pct',
+           marker='o',
+           color='#0D96C9', 
+           errorbar=None)
+
+plt.xlabel('Temperature')
+plt.ylabel('% backbones with\n(scRMSD < 2Å)')
+
+
+# Force x-axis tick labels to show
+plt.gca().set_xticks(averages['temperature'].unique())  # Set ticks at each temperature value
+plt.gca().set_xticklabels(averages['temperature'].unique())  # Force labels to show
+
+# Remove top and right spines
+plt.gca().spines['top'].set_visible(False)
+plt.gca().spines['right'].set_visible(False)
+
+plt.tight_layout()
+plt.title('BackboneRef sample designability\nby temperature')
+
diff --git a/analysis/figure2b+d.py b/analysis/figure2b+d.py
new file mode 100644
index 0000000..2b676c7
--- /dev/null
+++ b/analysis/figure2b+d.py
@@ -0,0 +1,60 @@
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from scipy import stats
+
+sns.set(font_scale=1.7)
+sns.set_style("whitegrid")
+plt.rcParams['axes.grid'] = False
+plt.rc('axes',edgecolor='black')
+
+plt.rc("text", usetex=False)
+plt.rc(
+    "text.latex",
+    preamble=r"\usepackage{newpxtext}\usepackage{newpxmath}\usepackage{commath}\usepackage{mathtools}",
+)
+plt.rcParams['font.family'] = 'sans-serif'
+plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
+plt.rc("savefig", dpi=500)
+plt.rc("legend", loc="best", fontsize="medium", fancybox=True, framealpha=0.5)
+plt.rc("lines", linewidth=2.5, markersize=10, markeredgewidth=2.5)
+plt.rc("axes", titlepad=10)
+colors = ["#BBBBBB", "#33BBEE", "#EE3377", "#009988", "#CC3311", "#0077BB"]
+colors = list(reversed(colors))
+sns.set_palette(sns.color_palette(colors))
+# set mpl palette
+plt.rcParams["axes.prop_cycle"] = plt.cycler(color=colors)
+
+# this should be on the zenodo
+df = pd.read_parquet('backbone_novelty_quality_statistics.parquet')
+
+# figure 2d
+sns.histplot(df,
+             x='length',
+             y='max_search_tm')
+plt.ylabel('TM-score (AFDB/UniProt)')
+plt.xlabel('Backbone length (AA)')
+plt.xlim(40, 512)
+_ = plt.xticks([100, 200, 300, 400, 500, ])
+plt.ylim(0, 1.0)
+plt.title('Max. TM-score of BR structures')
+# make correlation txt
+corr_df = df[['length', 'max_search_tm']]
+corr_df.dropna(inplace=True)
+res = stats.pearsonr(corr_df['length'], corr_df['max_search_tm'])
+r = res.correlation.item()
+_ = plt.text(350, 0.85, f'R = {r:.2f}')
+
+
+# figure 2b
+plt.figure()
+sns.ecdfplot(df['avg_scrmsd'], stat='proportion', 
+             complementary=False, linewidth=2, color='#0D96C9')
+plt.xlabel('Average scRMSD')
+plt.ylabel('Percentile')
+plt.title('Designability of BR backbones')
+plt.axvline(x=2, linestyle='--', color='black',zorder=-1, alpha=0.5)
+
+
+
+
diff --git a/analysis/figure2c.py b/analysis/figure2c.py
new file mode 100644
index 0000000..f58c0c9
--- /dev/null
+++ b/analysis/figure2c.py
@@ -0,0 +1,129 @@
+import pathlib
+import pandas as pd
+import joblib
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+sns.set(font_scale=1.7)
+sns.set_style("whitegrid")
+plt.rcParams["axes.grid"] = False
+plt.rc("axes", edgecolor="black")
+plt.rcParams['font.family'] = 'sans-serif'
+plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
+plt.rc("savefig", dpi=500)
+plt.rc("legend", loc="best", fontsize="medium", fancybox=True, framealpha=0.5)
+plt.rc("lines", linewidth=2.5, markersize=10, markeredgewidth=2.5)
+plt.rc("axes", titlepad=10)
+
+colors = ["#BBBBBB", "#33BBEE", "#EE3377", "#009988", "#CC3311", "#0077BB"]
+colors = list(reversed(colors))
+sns.set_palette(sns.color_palette(colors))
+plt.rcParams["axes.prop_cycle"] = plt.cycler(color=colors)
+
+together_cluster = pd.read_csv(
+    "pdb_plus_240k/scRMSD_best_240k_plus_pdbFirst_default__cluster.tsv",
+    sep="\t",
+    header=None,
+    names=["representative", "member"],
+)
+
+
+representatives, pdb_counts, percentages, sizes, member, effective_sz = (
+    [],
+    [],
+    [],
+    [],
+    [],
+    [],
+)
+for representative, groupby in together_cluster.groupby("representative"):
+    unique_pdb = groupby["member"][groupby["member"].str.startswith("pdb")].tolist()
+    unique_pdb = [s.split("_")[0] for s in unique_pdb]
+    unique_pdb = set(unique_pdb)
+
+    unique_non_pdb = groupby["member"][
+        ~groupby["member"].str.startswith("pdb")
+    ].tolist()
+    unique_non_pdb = set(unique_non_pdb)
+    percentage_pdb = len(unique_pdb) / len(unique_pdb.union(unique_non_pdb))
+
+    representatives.append(representative)
+    pdb_counts.append(len(unique_pdb))
+    percentages.append(percentage_pdb)
+    sizes.append(len(groupby))
+    effective_size = len(unique_pdb.union(unique_non_pdb))
+    effective_sz.append(effective_size)
+
+cluster_pdb = pd.DataFrame(
+    {
+        "representative": representatives,
+        "pdb_count": pdb_counts,
+        "percentage": percentages,
+        "size": sizes,
+        "effective_size": effective_sz,
+    }
+)
+cluster_pdb["pdb_log"] = np.log(cluster_pdb["pdb_count"] + 1)
+cluster_pdb["size_log"] = np.log(cluster_pdb["size"])
+cluster_pdb["size_syn"] = cluster_pdb["size"] - cluster_pdb["pdb_count"]
+
+cluster_pdb['is_synthetic'] = cluster_pdb['size_syn'] > 0
+cluster_pdb['syn_only'] = cluster_pdb['size_syn'] == cluster_pdb['size']
+
+
+
+fig, axs = plt.subplots(1, 1)
+cluster_pdb_sorted = cluster_pdb.sort_values('syn_only', ascending=True)
+
+g = sns.scatterplot(
+    data=cluster_pdb_sorted,
+    x="size",
+    y="size_syn",
+    alpha=1,
+    ax=axs,
+    s=20,
+    hue='syn_only',
+    palette=['#7f7f7f', '#0D96C9'],
+    legend=True  # Turn off automatic legend
+)
+
+f = 0.1
+xmin = cluster_pdb_sorted['size'].min() 
+xmin = xmin - xmin * f
+
+xmax = cluster_pdb_sorted['size'].max()
+xmax = xmax + xmin * f
+
+# Log scale for x-axis
+axs.set_xscale("log")
+axs.set_yscale('log')
+
+# Set labels and title
+axs.set_xlabel("Cluster size")
+axs.set_ylabel("# BBR members")
+axs.set_title("Cluster size by num. BBR members")
+
+#Create custom legend handles
+from matplotlib.lines import Line2D
+legend_elements = [
+    Line2D([0], [0], marker='o', color='w', markerfacecolor='#7f7f7f', 
+           label='PDB + BBRef samples', markersize=6),
+    Line2D([0], [0], marker='o', color='w', markerfacecolor='#0D96C9', 
+           label='BBRef samples only', markersize=6)
+]
+# PDB + BBRef samples" & "BBRef samples only" ?
+# also please change "PDB + synthetic samples" to "PDB + BBRef samples"
+ 
+# Add custom legend
+axs.legend(handles=legend_elements, frameon=False, markerscale=2.5)
+
+# Remove frame
+axs.spines['top'].set_visible(False)
+axs.spines['right'].set_visible(False)
+
+plt.tight_layout()
+
+
+
+

From 7e45ebdc07eda4add35ba4e1a22c95026aa76c41 Mon Sep 17 00:00:00 2001
From: Alex Lee <Alex.lee@ucsf.edu>
Date: Tue, 15 Jul 2025 21:51:43 -0700
Subject: [PATCH 2/3] add backboneref amulet files

---
 analysis/br/foldseek/clust_synalone.yaml      |  32 ++
 analysis/br/foldseek/clust_together.yaml      |  32 ++
 analysis/br/foldseek/cluster_pdb_alone.yaml   |  34 ++
 analysis/br/foldseek/parallell_search.yaml    |  47 +++
 analysis/br/omegafold/Dockerfile              |  15 +
 analysis/br/omegafold/parallel.yaml           |  40 +++
 analysis/br/omegafold/single.yaml             |  41 +++
 analysis/br/pmpnn/Dockerfile                  |   8 +
 analysis/br/pmpnn/base_parallel.yaml          |  44 +++
 analysis/br/pydssp/Dockerfile                 |   4 +
 analysis/br/pydssp/pydssp_parallel.yaml       |  46 +++
 analysis/br/rfdiffusion/Dockerfile            |  22 ++
 .../br/rfdiffusion/amulet/Dockerfile.amulet   |  30 ++
 analysis/br/rfdiffusion/amulet/job.yaml       |  36 ++
 analysis/br/rfdiffusion/amulet/parallel.yaml  |  55 +++
 analysis/br/rfdiffusion/gen_rfdiff.py         | 333 ++++++++++++++++++
 16 files changed, 819 insertions(+)
 create mode 100644 analysis/br/foldseek/clust_synalone.yaml
 create mode 100644 analysis/br/foldseek/clust_together.yaml
 create mode 100644 analysis/br/foldseek/cluster_pdb_alone.yaml
 create mode 100644 analysis/br/foldseek/parallell_search.yaml
 create mode 100644 analysis/br/omegafold/Dockerfile
 create mode 100644 analysis/br/omegafold/parallel.yaml
 create mode 100644 analysis/br/omegafold/single.yaml
 create mode 100644 analysis/br/pmpnn/Dockerfile
 create mode 100644 analysis/br/pmpnn/base_parallel.yaml
 create mode 100644 analysis/br/pydssp/Dockerfile
 create mode 100644 analysis/br/pydssp/pydssp_parallel.yaml
 create mode 100644 analysis/br/rfdiffusion/Dockerfile
 create mode 100644 analysis/br/rfdiffusion/amulet/Dockerfile.amulet
 create mode 100644 analysis/br/rfdiffusion/amulet/job.yaml
 create mode 100644 analysis/br/rfdiffusion/amulet/parallel.yaml
 create mode 100644 analysis/br/rfdiffusion/gen_rfdiff.py

diff --git a/analysis/br/foldseek/clust_synalone.yaml b/analysis/br/foldseek/clust_synalone.yaml
new file mode 100644
index 0000000..f084f68
--- /dev/null
+++ b/analysis/br/foldseek/clust_synalone.yaml
@@ -0,0 +1,32 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/fs:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+  fseek:
+    storage_account_name: alexleecold
+    container_name: foldseek
+    mount_dir: /mnt/foldseekdb/
+
+jobs:  
+- name: "cluster_together"
+  #sku: 8C60 # 32 gb 16 cores
+  #sku: 8C30
+  sku: 8C60
+  #sku: 10C3
+  priority: high
+  process_count_per_node: 1
+  command:
+  - /usr/local/bin/entrypoint easy-cluster /mnt/data/alexleecold/pdbs/foldseek_best_scRMSD /mnt/data/alexleecold/clustering_experiments/syn_alone_w_seqreplace/syn240k_alone_default_ /tmp
diff --git a/analysis/br/foldseek/clust_together.yaml b/analysis/br/foldseek/clust_together.yaml
new file mode 100644
index 0000000..d2a0d27
--- /dev/null
+++ b/analysis/br/foldseek/clust_together.yaml
@@ -0,0 +1,32 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/fs:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+  fseek:
+    storage_account_name: alexleecold
+    container_name: foldseek
+    mount_dir: /mnt/foldseekdb/
+
+jobs:  
+- name: "cluster_pdb"
+  #sku: 8C60 # 32 gb 16 cores
+  #sku: 8C30
+  sku: 8C60
+  #sku: 10C3
+  priority: high
+  process_count_per_node: 1
+  command:
+  - /usr/local/bin/entrypoint easy-cluster /mnt/data/alexleecold/pdbs/scRMSD_best_240k_plus_pdbFirst /mnt/data/alexleecold/clustering_experiments/pdb_plus_240k/scRMSD_best_240k_plus_pdbFirst_default_ /tmp
diff --git a/analysis/br/foldseek/cluster_pdb_alone.yaml b/analysis/br/foldseek/cluster_pdb_alone.yaml
new file mode 100644
index 0000000..2777839
--- /dev/null
+++ b/analysis/br/foldseek/cluster_pdb_alone.yaml
@@ -0,0 +1,34 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/fs:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+  image_setup:
+    - echo "Setup!"
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+  fseek:
+    storage_account_name: alexleecold
+    container_name: foldseek
+    mount_dir: /mnt/foldseekdb/
+
+jobs:  
+- name: "cluster_synthetic"
+  #sku: 8C60 # 32 gb 16 cores
+  #sku: 8C30
+  sku: 8C60
+  #sku: 10C3
+  priority: high
+  process_count_per_node: 1
+  command:
+  - /usr/local/bin/entrypoint easy-cluster /mnt/data/alexleecold/pdbs/pdb_flat_first /mnt/data/alexleecold/clustering_experiments/pdbalone/pdbAlone_flat_first_default_ /tmp 
diff --git a/analysis/br/foldseek/parallell_search.yaml b/analysis/br/foldseek/parallell_search.yaml
new file mode 100644
index 0000000..b526636
--- /dev/null
+++ b/analysis/br/foldseek/parallell_search.yaml
@@ -0,0 +1,47 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/fs:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+  image_setup:
+    - echo "Setup!"
+    - echo "Doing somehting!"
+  # - . setup.sh
+
+# code:
+#   # $CONFIG_DIR is expanded to the directory of this config file
+#   local_dir: /home/t-leea/project/
+#   ignore: 
+#     - prodata/scripts/tools/foldseek/*
+#     - plm
+#     - RFdiffusion
+
+# data:
+#   local_dir: /data/uniref50_202401
+#   remote_dir: uniref50_202401
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+  fseek:
+    storage_account_name: alexleecold
+    container_name: foldseek
+    mount_dir: /mnt/foldseekdb/
+
+jobs:
+- name: foldseek  
+  sku: 8C60
+  priority: high
+  process_count_per_node: 1
+  command:
+  #- /usr/local/bin/entrypoint easy-search /mnt/data/omegafold/best_syn_rmsd/0.1 /mnt/foldseekdb/afdb50/afdb50 /mnt/data/alexleecold/omegafold/best-fit-seqs_search.m8 /tmp --format-output "query,target,alntmscore,qtmscore,ttmscore,lddt,prob,evalue,bits"
+  - /usr/local/bin/entrypoint easy-search /mnt/data/alexleecold/pdbs/foldseek_140316_tosearch /mnt/foldseekdb/afdb50/afdb50 /mnt/data/alexleecold/omegafold/best-fit-seqs_search_140k.m8 /tmp --format-output "query,target,alntmscore,qtmscore,ttmscore,lddt,prob,evalue,bits"
+  - sleep 1m
diff --git a/analysis/br/omegafold/Dockerfile b/analysis/br/omegafold/Dockerfile
new file mode 100644
index 0000000..d8049ca
--- /dev/null
+++ b/analysis/br/omegafold/Dockerfile
@@ -0,0 +1,15 @@
+ARG PYTORCH_TAG=2.3.1-cuda12.1-cudnn8-devel
+FROM pytorch/pytorch:${PYTORCH_TAG}
+
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+        build-essential \
+        git \
+        wget \
+        curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get autoremove -y \
+    && apt-get clean
+
+RUN pip install biopython && pip install --no-deps git+https://github.com/alexj-lee/OmegaFold.git && \
+    mkdir -p /root/.cache/omegafold_ckpt && wget https://helixon.s3.amazonaws.com/release1.pt -O /root/.cache/omegafold_ckpt/model.pt
diff --git a/analysis/br/omegafold/parallel.yaml b/analysis/br/omegafold/parallel.yaml
new file mode 100644
index 0000000..789d448
--- /dev/null
+++ b/analysis/br/omegafold/parallel.yaml
@@ -0,0 +1,40 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/omegafold:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+
+code:
+  # $CONFIG_DIR is expanded to the directory of this config file
+  local_dir: /home/t-leea/project/protprune/scripts
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+
+search:
+  job_template:
+    name: "{experiment_name:s}_{auto:3s}"
+    sku: G1-A100 # 32 gb 16 cores
+    #sku: 10C3
+    priority: high
+    process_count_per_node: 1
+    command:
+    - mkdir /mnt/data/alexleecold/omegafold/0dot1/{dirname}
+    - omegafold --subbatch_size 448 /mnt/data/alexleecold/pmpnn_results/0dot1/{dirname}.fasta /mnt/data/alexleecold/omegafold/0dot1/{dirname}
+    submit_args:
+      env:
+        SHARED_MEMORY_PERCENT: 0.1
+  type: grid
+  max_trials: 5000
+  params:
+    - name: dirname
+      values: ['202407262019_xdNQ', '202407201930_ft4g', '202407201950_YEeQ']
diff --git a/analysis/br/omegafold/single.yaml b/analysis/br/omegafold/single.yaml
new file mode 100644
index 0000000..a13d133
--- /dev/null
+++ b/analysis/br/omegafold/single.yaml
@@ -0,0 +1,41 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+  # name: msrresrchlab
+  # workspace_name: biomlinterns2024
+  # resource_group: gcr-singularity-lab
+
+
+environment:
+  image: alexjlee/ofold:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+  image_setup:
+    - echo "bye"
+
+code:
+  # $CONFIG_DIR is expanded to the directory of this config file
+  local_dir: /home/t-leea/project/protprune/scripts
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+
+job:
+  name: "{experiment_name:s}_{auto:3s}"
+  sku: G1-A100 # 32 gb 16 cores
+  #sku: 10C3
+  priority: high
+  process_count_per_node: 1
+  command:
+  #- echo {dirname} >> /mnt/data/alexleecold/foldseek_finished_backbones.txt
+  #- python pmpnn_cdesign.py --directory {dirname} --output_fasta /mnt/data/alexleecold/pmpnn_results/0dot1/{dirname}.fasta --temperature 0.1 --num_seqs 10
+  #- touch /mnt/data/alexleecold/pmpnn_results/0dot1/done/{dirname}
+  - mkdir /mnt/data/alexleecold/omegafold/0dot1/{dirname}
+  - omegafold --subbatch_size 224 /mnt/data/alexleecold/pmpnn_results/0dot1/{dirname}.fasta /mnt/data/alexleecold/omegafold/0dot1/{dirname}
diff --git a/analysis/br/pmpnn/Dockerfile b/analysis/br/pmpnn/Dockerfile
new file mode 100644
index 0000000..bf3df7e
--- /dev/null
+++ b/analysis/br/pmpnn/Dockerfile
@@ -0,0 +1,8 @@
+FROM nvcr.io/nvidia/pytorch:23.09-py3
+
+
+RUN pip install --upgrade pip && \
+    pip install "jax[cuda]" \
+        -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+RUN pip install git+https://github.com/sokrypton/ColabDesign
+
diff --git a/analysis/br/pmpnn/base_parallel.yaml b/analysis/br/pmpnn/base_parallel.yaml
new file mode 100644
index 0000000..7fd67e5
--- /dev/null
+++ b/analysis/br/pmpnn/base_parallel.yaml
@@ -0,0 +1,44 @@
+target:
+  service: sing
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/colabdesign_jaxgpu:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+
+code:
+  # $CONFIG_DIR is expanded to the directory of this config file
+  local_dir: /home/t-leea/project/protprune/scripts
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+
+search:
+  job_template:
+    name: "{experiment_name:s}_{auto:3s}"
+    sku: 8C7
+    priority: high
+    process_count_per_node: 1
+    command:
+    - mkdir -p /mnt/data/alexleecold/pmpnn_results_u50length/{temperature}/done
+    - python pmpnn_cdesign.py --directory /mnt/data/alexleecold/backbones_u50length/{dirname} --output_fasta /mnt/data/alexleecold/pmpnn_results_u50length/{temperature}/{dirname}.fasta --temperature {temperature} --num_seqs 10
+    - touch /mnt/data/alexleecold/pmpnn_results_u50length/{temperature}/done/{dirname}
+    - sleep 30s
+    submit_args:
+      env:
+        SHARED_MEMORY_PERCENT: 0.1
+        CUDA_VISIBLE_DEVICES: ""
+        JAX_PLATFORMS: "cpu"
+  type: grid
+  max_trials: 400
+  params:
+    - name: dirname
+      values: ['20240810122_mcz2','202408101759_mj8X','202408101946_JehM','202408101946_YPnu','202408102045_dQqr',]
+    - name: temperature
+      values: [0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0]
\ No newline at end of file
diff --git a/analysis/br/pydssp/Dockerfile b/analysis/br/pydssp/Dockerfile
new file mode 100644
index 0000000..894cb12
--- /dev/null
+++ b/analysis/br/pydssp/Dockerfile
@@ -0,0 +1,4 @@
+FROM singularitybase.azurecr.io/base/job/pytorch/acpt-2.2.1-py3.10-cuda12.1:20240312T225111416 as base
+
+RUN pip install git+https://github.com/alexj-lee/PyDSSP/
+RUN echo "rebuild"
diff --git a/analysis/br/pydssp/pydssp_parallel.yaml b/analysis/br/pydssp/pydssp_parallel.yaml
new file mode 100644
index 0000000..3567cf9
--- /dev/null
+++ b/analysis/br/pydssp/pydssp_parallel.yaml
@@ -0,0 +1,46 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/pdp:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+
+# code:
+#   # $CONFIG_DIR is expanded to the directory of this config file
+#   local_dir: /home/t-leea/project/protprune/scripts
+
+# data:
+#   local_dir: /home/t-leea/checkpoints
+#   remote_dir: alexleecold/rfdiff/checkpoints
+#   data upload is not required for this example
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+
+# list of jobs to run, we run 2 jobs in this example
+search:
+  job_template:
+    name: "{experiment_name:s}_{auto:3s}"
+    sku: 10C3
+    process_count_per_node: 1
+    command:
+    - mkdir -p /mnt/data/alexleecold/pydssp_u50length/done
+    - pydssp /mnt/data/alexleecold/backbones_u50length/{dirname} -d cpu -o /mnt/data/alexleecold/pydssp_u50length/{dirname}.dssp
+    - touch /mnt/data/alexleecold/pydssp_u50length/done/{dirname}
+    - sleep 1m
+  type: grid
+  max_trials: 1000
+  params:
+    - name: dirname
+      #
+      #values: ['202408101031_9Q6G', '202408101056_tNN7', '202408101241_Nawi', '20240810143_BSEv', '20240810153_3kv1', '20240810153_QVZd', '202408101710_4ysa', '202408101829_OFVA', '20240810182_NGjb', '202408101836_1iPO', '202408101844_WYFR', '202408101856_doJ3', '202408101857_aSfS', '202408101857_v2pP', '202408101912_tj9c', '20240810191_iS0X', '202408101923_5Nvb', '202408101931_Ah6S', '202408101944_aQyo', '202408101949_XhSS', '20240810199_Ui4O', '20240810200_Tx7Q', '20240810202_v0fL', '20240810204_N4p4', '20240810205_w4Gh', '20240810221_LOLh', '202408102319_kyNx', '202408102324_S4h2', '202408102331_iANp', '202408102346_CPZl', '2024081032_EyrB', '20240810341_NPeK', '20240810447_1Y2q', '20240810514_eUY6', '20240810521_kjKj', '20240810523_Gadq', '20240810554_FUtt', '20240810557_BNYP', '2024081055_uzrX', '2024081056_DkZm', '20240810610_J1nr', '2024081069_vj6U', '20240810820_0Lju', '20240811027_cAWF', '20240811030_P8fC', '20240811036_Wm5X', '2024081106_MIxD', '2024081108_oPis', '20240811110_5J8D', '20240811110_ihY4', '202408111110_FoRe', '202408111110_lqKy', '202408111111_DpiL', '202408111112_Zbdn', '202408111112_Znsz', '202408111113_GM8G', '202408111114_X5yj', '202408111115_y1LH', '202408111117_PaIP', '202408111118_j2A6', '202408111118_jns2', '202408111119_JXhw', '202408111119_UblG', '202408111119_Xz4E', '202408111121_W5F1', '202408111122_40Po', '202408111122_BlTy', '202408111123_sI0J', '202408111123_zql1', '202408111126_f93e', '20240811112_1wrP', '20240811112_ryBz', '202408111131_UWWa', '202408111135_G2JY', '202408111135_i2rg', '202408111139_snzV', '202408111139_x6PR', '20240811113_1B6d', '202408111146_Sdge', '202408111147_xclR', '20240811114_kCbq', '202408111150_PVeV', '202408111150_WQow', '202408111152_OJVt', '202408111154_9Hh3', '202408111156_8pYe', '202408111156_H5dM', '20240811116_ZzTt', '20240811116_aJEW', '20240811117_JVmW', '20240811119_KlKq', '20240811119_hWI8', '2024081111_31ef', '20240811120_BfQE', '202408111223_Lr1Y', '202408111330_dslw', '20240811149_YLV1', '202408111521_loB5', '202408111521_zkLX', '202408111538_SenI', '20240811178_LiiG', '2024081117_HXv1', '202408111846_MHEn', '202408111946_Iht7', '202408111946_yzRJ', '20240811211_MZCu', '20240811212_YOsI', '20240811212_hDsB', '2024081121_aaJK', '20240811223_e3GQ', '202408112245_FLs0', '20240811226_PyIf', '202408112318_Pgeh', '202408112318_m4le', '20240811235_Bm6S', '20240811235_sjoZ', '20240811236_Ilwu', '20240811236_tQXr', '20240811242_ju7z', '20240811248_8Wc6', '20240811254_R6kb', '20240811257_bxLd', '2024081125_ARnR', '2024081130_NKfI', '20240811326_Khe4', '2024081132_6kjE', '2024081132_Da8p', '20240811331_TyhP', '20240811339_pQWl', '20240811341_vKby', '20240811345_6pe2', '20240811348_9eO3', '2024081134_kazp', '20240811358_U0tA', '20240811359_WFBn', '2024081136_36Hj', '20240811418_9pYy', '20240811427_lcqQ', '20240811428_h5nH', '20240811431_IR50', '20240811436_zKnB', '20240811442_G15y', '20240811443_1uIT', '20240811445_fX4d', '2024081144_oeIR', '2024081145_TqZD', '2024081146_hc5l', '2024081150_h2pO', '2024081150_zmCi', '20240811526_itpZ', '20240811527_ISCX', '20240811531_uvpz', '20240811535_K5h4', '20240811536_g8UF', '20240811536_v0NR', '20240811536_wgcm', '20240811537_JEjV', '20240811537_v9Rk', '20240811539_TDNQ', '20240811540_H92P', '20240811548_Vrwv', '20240811548_bdpu', '20240811549_OLMG', '20240811549_uA2B', '20240811550_X8n2', '20240811551_85Rp', '20240811552_UrPH', '20240811554_yGn2', '20240811556_YooQ', '20240811556_k9PD', '2024081156_C4y2', '2024081156_kSzP', '20240811619_9fd8', '20240811619_ylP5', '20240811620_xGKh', '20240811621_25Ob', '20240811626_iLPl', '20240811628_Od4u', '20240811638_rbHY', '2024081164_A2sy', '2024081166_X9KV', '2024081167_b9SN', '2024081168_7jR4', '20240811719_g1rO', '20240811723_QTR8', '20240811728_FEa1', '20240811731_7GB0', '20240811739_mRWl', '20240811743_MrOf', '20240811744_4WMq', '2024081175_YTEi', '20240811843_b9Rb', '20240812042_SbAd', '20240812042_n90s', '202408121118_hcsa', '202408121228_rAvy', '202408121239_0NZH', '202408121239_JIhp', '20240812124_Yje4', '20240812124_yDdP', '202408121316_anfp', '202408121333_9XYJ', '202408121333_DaNc', '202408121414_UnCQ', '202408121422_wiZV', '202408121430_M70S', '202408121434_CPSn', '202408121438_8kK0', '20240812143_V6Ch', '20240812143_ZPOE', '202408121447_vz0N', '202408121513_Xrih', '202408121515_yLgM', '202408121516_8B4D', '202408121516_k062', '202408121516_lPk8', '202408121523_4QmA', '202408121529_BHLb', '202408121535_0HL7', '202408121535_HoBu', '202408121537_8vmZ', '20240812161_LHNd', '20240812163_UDD1', '202408121651_ueIY', '202408121653_bJ7B', '202408121654_44dv', '202408121655_Uylr', '202408121657_NmLF', '202408121659_AZbu', '202408121659_IW62', '202408121714_WJMX', '202408121717_Fn48', '202408121717_eSdx', '202408121744_1dgY', '202408121745_hc8E', '202408121750_kk20', '202408121817_KA7x', '202408121820_Uxq8', '202408121821_HhfA', '202408121830_KCI9', '202408121837_6j27', '202408121912_s9jO', '20240812191_Mz0p', '202408121953_1Dv7', '202408122015_r657', '202408122054_M0GV', '20240812207_ZmzQ', '202408122111_3BJx', '2024081221_U2sO', '2024081221_WLir', '202408122215_M79x', '202408122222_YVhq', '202408122231_j0nj', '202408122231_ndJv', '202408122235_tKkc', '202408122236_weVf', '202408122243_5S5m', '202408122243_9yez', '202408122246_c994', '202408122250_3wX4', '202408122259_VuLL', '202408122340_31el', '202408122345_UB46', '202408122358_36Lb', '202408122358_WgPe', '20240812236_7Cbh', '20240812236_ypDO', '20240812237_u9v8', '2024081250_h8SG', '20240812544_wTCr', '20240812556_613o', '20240812721_FPRE', '2024081273_wSCU', '20240813023_iwnW', '20240813024_45db', '20240813036_vLcO', '20240813037_3R8x', '20240813037_xmrj', '20240813043_r244', '202408131113_u3Mh', '202408131113_yeI4', '20240813130_sVhL', '20240813131_G4um', '20240813142_5ZNU', '20240813144_K72c', '202408131525_RXse', '202408131525_TeoX', '202408131548_IfS1', '202408131548_MJQw', '20240813155_Ov5O', '2024081316_Y5MF', '20240813228_80F1', '20240813237_J4i6', '20240813237_MMr1', '2024081323_03ka', '20240813240_MYwN', '20240813240_XLGL', '20240813247_ZqNn', '20240813347_Ba83', '20240813427_mzRM', '20240813436_Enp6', '2024089160_Vb7f', '20240891612_l5Om', '20240891612_o9Gh', '20240891612_t07X', '20240891614_K6ud', '20240891615_CneJ', '20240891616_rFV9', '20240891617_ibtr', '20240891618_cqM1', '20240891621_l0Z4', '20240891623_Dhyb', '20240891627_ANij', '20240891630_Nbke', '20240891635_d33L', '20240891635_kI7Q', '2024089163_pMQ0', '20240891649_6Zrz', '2024089164_Ias9', '2024089164_yaxt', '2024089164_ytiN', '20240891650_f6u4', '20240891651_li7V', '20240891654_hgcB', '2024089165_Pxck', '2024089165_kA8J', '2024089165_w9Pl', '2024089166_8rn3', '2024089166_hW1p', '2024089167_PV4f', '2024089167_rgtR', '2024089167_ywGT', '2024089168_KvwZ', '2024089168_Pq5y', '2024089169_JULm', '2024089169_LxUY', '20240891724_y1uo', '2024089173_pFpY', '20240891754_kU7e', '20240891920_SZQr', '20240891920_rOmz', '20240892034_dkVX', '20240892110_Thvf', '20240892110_uCpL', '20240892119_mZbf', '20240892121_T7X8', '20240892128_n5aU', '20240892133_OZ5F', '20240892136_Jwnh', '20240892141_en36', '20240892143_xuNC', '20240892145_pWJk', '20240892150_kuLx', '20240892158_4BvZ', '20240892159_cFBJ', '2024089217_GgiP', '2024089219_K2Pp', '20240892210_xXve', '20240892211_Qa82', '20240892221_KQqK', '20240892224_O4HK', '20240892224_sPsS', '20240892225_w36I', '2024089222_vgOB', '20240892230_2JGR', '20240892234_rmjM', '20240892236_0FxJ', '20240892240_OaL7', '20240892242_Q72R', '20240892245_bxuP', '20240892249_zOCd', '2024089224_hLed', '20240892251_Q0an', '20240892253_PHAX', '20240892255_WFrY', '20240892257_lTdl']
+      values: ['202408102331_iANp', '20240891754_kU7e', '2024081175_YTEi', '20240813240_MYwN', '20240811535_K5h4', '20240811331_TyhP', '202408121745_hc8E', '20240813237_MMr1', '202408121655_Uylr']
+      #values: ['20240810122_mcz2','202408101759_mj8X','202408101946_JehM','202408101946_YPnu','202408102045_dQqr','202408102342_2JXE','202408102342_tOOV','202408102344_RHup','202408102344_edQV','20240811149_0ubq','20240811149_4Bzu','2024089150_TU6H','2024089150_W2Cw','2024089150_cJ61','2024089150_vnan','20240891510_NhYO','20240891510_SbSn','20240891510_UzPY','20240891510_gsce','20240891510_sfJf','20240891511_bZRN','20240891511_uOeV','20240891512_Odvr','20240891512_UVIc','20240891512_Wlaa','20240891512_aldX','20240891513_TdEy','20240891514_jVks','20240891514_zt6V','20240891516_7qRe','20240891516_YRiU','20240891517_58ki','20240891517_JMDc','20240891517_UBIl','20240891517_fJ9I','20240891518_21cg','20240891518_8K2Z','20240891518_BAiB','20240891518_Yck6','20240891519_OKjQ','20240891519_rG0p','20240891519_zf6C','2024089151_CqmT','20240891521_3uyo','20240891521_sUVH','20240891523_JgVl','20240891523_Zew8','20240891523_fvMb','20240891523_opSm','20240891523_xeO3','20240891525_0E00','20240891525_Dy0W','20240891525_ZKFP','20240891526_rM4K','20240891527_3hS4','20240891527_NTpR','20240891527_RXNH','20240891527_mb1q','20240891527_qJLB','20240891528_MkF9','20240891529_MmSA','2024089152_UP80','2024089152_q9Lz','20240891531_BtMN','20240891533_3inS','20240891533_n5Tx','20240891533_qmEX','20240891534_les6','20240891534_tTM7','20240891535_izEP','20240891535_qYOD','20240891535_zvko','20240891537_2LpY','20240891538_8CsF','20240891538_kQgj','20240891538_n5Yd','20240891539_eEh1','2024089153_Qs5N','20240891540_a05b','20240891540_wVYT','20240891541_hNzW','20240891541_oDrt','20240891541_ouWK','20240891541_r1PK','20240891544_pFgf','20240891545_AyGl','20240891545_JJOI','20240891545_OEql','20240891545_wGOp','20240891546_b4JI','20240891546_mAXl','20240891546_rS5J','20240891547_6Cux','20240891547_MmA9','20240891547_clnD','20240891547_jtK7','20240891548_BgF4','20240891548_VDpa','20240891549_ChEi','20240891549_Swik','20240891549_qTlE','20240891549_qt1z','2024089154_JujZ','20240891550_JuTI','20240891550_NNSw','20240891550_n7Ed','20240891550_wBbW','20240891551_RKj7','20240891551_hRsB','20240891552_91gq','20240891552_Awnn','20240891552_CSqk','20240891552_pidW','20240891552_q2Rr','20240891552_yl42','20240891552_zP0L','20240891554_9MJg','20240891555_kDGE','20240891555_lnce','20240891556_pMny','20240891556_z87K','20240891557_V7hd','20240891559_w7i6','2024089155_9jwK','2024089155_PrrY','2024089155_TuJI','2024089155_hZnl','2024089156_BFyN','2024089156_GYAY','2024089156_JpjY','2024089156_N2X9','2024089156_XXa0','2024089157_9iyc','2024089157_9krm','2024089158_3nEd','2024089158_FIgQ','2024089159_Uc65','2024089159_ie9O','2024089160_Wr9r','2024089160_xBna','20240891638_Orhz','20240891649_zYpz','20240891651_Llz8','20240891654_ZTQh','20240891656_OIIP','20240891656_lqmd','20240891659_7xKI','20240891659_RPfq','20240892010_W0ZT','20240892215_szM9']
diff --git a/analysis/br/rfdiffusion/Dockerfile b/analysis/br/rfdiffusion/Dockerfile
new file mode 100644
index 0000000..4682494
--- /dev/null
+++ b/analysis/br/rfdiffusion/Dockerfile
@@ -0,0 +1,22 @@
+FROM nvcr.io/nvidia/pytorch:23.09-py3
+
+RUN apt-get update && \
+        apt-get install -y \
+        wget \
+        git 
+RUN git clone https://github.com/sokrypton/RFdiffusion.git
+RUN pip install jedi \
+        omegaconf \
+        hydra-core \
+        icecream \
+        pyrsistent
+RUN pip install --no-dependencies dgl==2.0.0 -f https://data.dgl.ai/wheels/cu121/repo.html 
+RUN pip install --no-dependencies e3nn==0.3.3 
+RUN pip install opt_einsum_fx
+RUN cd RFdiffusion/env/SE3Transformer && \
+        pip install .
+RUN wget -qnc https://files.ipd.uw.edu/krypton/ananas && chmod +x ananas
+RUN mv RFdiffusion/* /root
+RUN pip install git+https://github.com/sokrypton/ColabDesign.git@v1.1.1
+RUN ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign
+ENV DGLBACKEND="pytorch"
diff --git a/analysis/br/rfdiffusion/amulet/Dockerfile.amulet b/analysis/br/rfdiffusion/amulet/Dockerfile.amulet
new file mode 100644
index 0000000..8b48d99
--- /dev/null
+++ b/analysis/br/rfdiffusion/amulet/Dockerfile.amulet
@@ -0,0 +1,30 @@
+FROM singularitybase.azurecr.io/base/job/pytorch/acpt-2.2.1-py3.10-cuda12.1:20240312T225111416 as base
+# FROM validations/base/singularity-tests as validator
+
+FROM base
+RUN apt-get update && \
+    apt install -y \
+    wget \
+    git && \
+    apt-get clean
+
+RUN git clone https://github.com/alexj-lee/RFdiffusion
+RUN pip install jedi \
+    omegaconf \
+    hydra-core \
+    icecream \
+    pyrsistent 
+RUN pip install --no-dependencies dgl==2.0.0 -f https://data.dgl.ai/wheels/cu121/repo.html && \
+    pip install --no-dependencies e3nn==0.3.3 && \
+    pip install opt_einsum_fx
+RUN cd RFdiffusion/env/SE3Transformer && \
+    pip install .
+RUN wget -qnc https://files.ipd.uw.edu/krypton/ananas && chmod +x ananas
+RUN mv RFdiffusion/* /root
+RUN chmod -R 777 /root
+RUN pip install git+https://github.com/sokrypton/ColabDesign.git@v1.1.1
+RUN ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign
+ENV DGLBACKEND="pytorch"
+#COPY --from=validator /validations /opt/microsoft/_singularity/validations/
+#ENV SINGULARITY_IMAGE_ACCELERATORY="NVIDIA"
+#RUN /opt/microsoft/_singularity/validations/validator.sh
diff --git a/analysis/br/rfdiffusion/amulet/job.yaml b/analysis/br/rfdiffusion/amulet/job.yaml
new file mode 100644
index 0000000..25c44b6
--- /dev/null
+++ b/analysis/br/rfdiffusion/amulet/job.yaml
@@ -0,0 +1,36 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+environment:
+  image: alexjlee/rfdiff:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+
+code:
+  # $CONFIG_DIR is expanded to the directory of this config file
+  local_dir: /home/t-leea/project/protprune/scripts
+
+# data:
+#   local_dir: /home/t-leea/checkpoints
+#   remote_dir: alexleecold/rfdiff/checkpoints
+#   data upload is not required for this example
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+
+# list of jobs to run, we run 2 jobs in this example
+jobs:
+- name: rfdiffusion
+  sku: 32G1-V100
+  process_count_per_node: 2
+  command:
+  - mkdir -p /mnt/data/alexleecold/backbones
+  - find /mnt/data
+  - python gen_rfdiff.py --nb_config /mnt/data/alexleecold/rfdiff/checkpoints/negbin_uniref50.yaml --num 20 --gpu 0 --container '' --checkpoint /mnt/data/alexleecold/rfdiff/checkpoints/Base_ckpt.pt --num_threads 16 --output_directory /mnt/data/alexleecold/backbones/
diff --git a/analysis/br/rfdiffusion/amulet/parallel.yaml b/analysis/br/rfdiffusion/amulet/parallel.yaml
new file mode 100644
index 0000000..8a63e94
--- /dev/null
+++ b/analysis/br/rfdiffusion/amulet/parallel.yaml
@@ -0,0 +1,55 @@
+target:
+  service: sing
+  # run "amlt target list aml" to list the names of available AML targets
+  
+  # name: msrresrchlab
+  # workspace_name: biomlinterns2024
+  # resource_group: gcr-singularity-lab
+  name: msrresrchvc
+  workspace_name: biomlinterns2024
+  resource_group: gcr-singularity-resrch
+
+
+  # name: msroctovc
+  # workspace_name: biomlinterns2024
+  # resource_group: gcr-singularity-octo
+
+environment:
+  image: alexjlee/rfdiff:latest
+  username: biomlinterns2024cr
+  registry: biomlinterns2024cr.azurecr.io
+  image_setup:
+    - echo "bye"
+
+code:
+  # $CONFIG_DIR is expanded to the directory of this config file
+  local_dir: /home/t-leea/project/protprune/scripts
+  ignore: 
+  - tools/foldseek
+  - tools/genie2
+  - tools/omegafold
+  - tools/pmpnn
+  - tools/pydssp
+  - training
+  - scrmsd
+
+
+storage:
+  data:
+    storage_account_name: alexleecold
+    container_name: amulet
+    mount_dir: /mnt/data/
+
+search: 
+  job_template:
+    name: "{experiment_name:s}_{auto:3s}"
+    sku: 16G1-V100
+    process_count_per_node: 2
+    command: # placeholder variable literally does nothing
+    - python gen_rfdiff.py --nb_config /mnt/data/alexleecold/rfdiff/checkpoints/negbin_uniref50.yaml --placeholder {placeholder} --num 1000 --gpu 0 --container '' --checkpoint /mnt/data/alexleecold/rfdiff/checkpoints/Base_ckpt.pt --num_threads 8 --output_directory /mnt/data/alexleecold/backbones/ --cleanup
+  type: grid
+  max_trials: 384
+  params: 
+    - name: placeholder
+      values: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105]
+
diff --git a/analysis/br/rfdiffusion/gen_rfdiff.py b/analysis/br/rfdiffusion/gen_rfdiff.py
new file mode 100644
index 0000000..101da88
--- /dev/null
+++ b/analysis/br/rfdiffusion/gen_rfdiff.py
@@ -0,0 +1,333 @@
+import yaml
+import pathlib
+import argparse
+import random
+import sys
+import string
+import shutil
+from datetime import datetime
+
+from scipy import stats
+import numpy as np
+from hydra import initialize, compose
+
+try:
+    import docker
+except ImportError:
+    pass
+
+"""
+Last updated 2024-07-11.
+TODO: I am realizing this is super suboptimal. There are two sources of
+unnecessary overhead: 
+
+1. starting the image
+Possibly ameliorated by addn of something like below with substitution of 
+docker exec for docker run:
+
+import docker
+
+# Create a Docker client
+client = docker.from_env()
+
+# Run a container in detached mode
+container = client.containers.run('my_docker_image', detach=True, name='my_container')
+
+# Execute a command in the running container
+exit_code, output = container.exec_run('command_to_run')
+
+# Print the output
+print(output.decode())
+
+# Stop and remove the container
+container.stop()
+container.remove()
+
+2. Loading weights and initializing the actual model itself:
+Solved probably by editing the script they are using.
+
+"""
+
+
+def get_timestamp_now():
+    now = datetime.datetime.now()
+    year = now.year
+    month = str(now.month).zfill(2)
+    day = now.day
+    hour = now.hour
+    second = now.second
+
+    return f"{year}{month}{day}{hour}{second}"
+
+
+def nb_from_file(file: str):
+    """
+    File to read params from a negative binomial parameter file in YAML format. If you want to bypass this,
+    the script is looking for `n`, `p`, and `loc`.
+    """
+
+    fpathlib = pathlib.Path(file)
+    if not fpathlib.exists():
+        raise FileNotFoundError(f"File {file} does not exist.")
+
+    with open(file, "r") as f:
+        config = yaml.safe_load(f)
+
+    return stats.nbinom(
+        config.get("n", None), config.get("p", None), config.get("loc", 0)
+    )
+
+
+def nb_from_params(n: int, p: float, loc: int):
+    return stats.nbinom(n, p, loc)
+  
+def generate_random_string(length=6):
+    characters = string.ascii_letters + string.digits
+    return "".join(random.choice(characters) for _ in range(length))
+
+
+def setup_docker(gpu: int):
+    client = docker.from_env()
+    device = [docker.types.DeviceRequest(device_ids=[str(gpu)], capabilities=[["gpu"]])]
+    ulimits = [  # docker complains if these are not added + GPU is desired
+        docker.types.Ulimit(name="memlock", hard=-1, soft=-1),
+        docker.types.Ulimit(name="stack", hard=67108864, soft=67108864),
+    ]
+    volume = {
+        "/home": {"bind": "/home", "mode": "rw"},
+        "/data": {"bind": "/data", "mode": "rw"},
+    }
+
+    return client, device, ulimits, volume
+
+
+def parse_args():
+    args = argparse.ArgumentParser()
+    sampler = args.add_mutually_exclusive_group(required=True)
+
+    sampler.add_argument(
+        "--nb_config",
+        type=str,
+        help="Path to the negative binomial config file",
+    )
+    sampler.add_argument(
+        "--lengths",
+        type=str,
+        help="Path to file of lengths in `.npy` file format.",
+    )
+
+    args.add_argument(
+        "--sequential",
+        action="store_true",
+        help="Run the inference sequentially, \
+                      as opposed to selecting n proteins of differing length from the length distribution and\
+                       generating those at the same time, meaning all proteins of length l are done in one docker command.",
+        required=False,
+    )
+    args.add_argument(
+        "--num",
+        type=int,
+        default=5000,
+        help="Number of proteins to generate.",
+        required=True,
+    )
+    args.add_argument(
+        "--gpu",
+        type=int,
+        default=0,
+        help="GPU to use. Will be provides a a `docker.types.DeviceRequest` with given ID.",
+        required=True,
+    )
+    args.add_argument(
+        "--output_directory",
+        type=str,
+        help="Output prefix for the generated proteins. Backbones will then be generated at path: {output_directory}/{DATETIMENOW}{RFDIFFUSION_NORMAL_OUTPUT}\
+                    where RFDIFFUSION_NORMAL_OUTPUT is the normal string name; we do this in order to prevent collisions between simultaneous runs of this script.",
+        required=True,
+    )
+    args.add_argument(
+        "--container",
+        type=str,
+        default="rfdiff",
+        help="Docker container to use.",
+        required=False,
+    )
+    args.add_argument(
+        "--checkpoint",
+        type=str,
+        help="Path to the checkpoint file to use.",
+        required=True,
+    )
+    args.add_argument(
+        "--num_threads",
+        type=str,
+        help="Number of threads to force one instance to use (CPU).",
+        required=False,
+        default=8,
+    )
+    args.add_argument(
+        "--cleanup",
+        action="store_true",
+        required=False,
+        help="Whether to delete `traj` files after running. ",
+    )
+    args.add_argument('--maxlen', type=int, default=1024, help='Maximum length of protein to generate.')
+    args.add_argument(
+        "--placeholder",
+        type=str,
+        help="Doesn't do anything. Just a placeholder to allow for multiple simultaneous amulet runs.",
+    )
+
+    args = parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    if pathlib.Path(args.checkpoint).exists() is False:
+        raise FileNotFoundError(f"Checkpoint file {args.checkpoint} does not exist.")
+
+    if pathlib.Path(args.output_prefix.parent).exists() is False:
+        raise ValueError(f"The output prefix desired's path directories do not exist. Please fix; parent is: {args.output_prefix.parent}.\n\
+                         Fix by running `mkdir -p {args.output_prefix.parent}`")
+
+    datetime_stamp = get_timestamp_now()
+
+    container = args.container
+    if not ((container is None) or (container == "none") or (container == "")):
+        try:
+            import docker
+
+            client, device, ulimits, volume = setup_docker(args.gpu)
+        except ImportError:
+            raise ImportError(
+                'You passed a container name but do not have the docker library installed. \
+                Please install the docker library or pass "" and then run this script using amulet.'
+            )
+    else:
+        client = None
+        try:
+            # assume we are on amulet and an appropriate container has been uploaded
+            # in Sergey's RFDiffusion repo (which we are forking for this image+script)
+            # the requisite scripting files are moved to /root
+            # the file structure is roughly something like:
+            # /root
+            # ...py files
+            # /root/config
+            # /root/config/inference/
+            # /root/config/inference/base.yaml
+            # /root/config/inference/symmetry.yaml
+
+            sys.path.append("/root/")
+            import run_inference  # type: ignore
+            # this is inside the container
+
+        except ImportError:
+            raise ImportError(
+                'You passed "" for the container meaning you wanted to run on amulet: for this \
+                we need to be able to find the `run_inference.py` script in /root on the \
+                container filesystem. \
+                Please specify a container or install the script.'
+            )
+
+    print(f"Client is: {client}")
+
+    rand_str = generate_random_string(length=4)
+    prefix = pathlib.Path(args.output_directory) / f"{datetime_stamp}_{rand_str}"
+    prefix.mkdir(parents=True, exist_ok=True)
+    print("Completed making parent dir:", prefix)
+
+    prefix = prefix.as_posix()  # convert to string
+
+    overrides = [  # baseline hydra opts
+        f"inference.ckpt_override_path={args.checkpoint}",
+        f"++num_threads={args.num_threads}",
+    
+    ]
+
+    if args.nb_config:
+        sampler = nb_from_file(
+            args.nb_config
+        )  # will be stats.nbinom object; see documentation for scipy
+        random_lengths = sampler.rvs(size=args.num)
+    else:
+        random_lengths = np.load(args.lengths)
+        random_lengths = np.random.choice(random_lengths, size=args.num, replace=True)
+
+    random_lengths = np.minimum(random_lengths, args.maxlen)
+    random_lengths = np.maximum(random_lengths, 40)
+
+    unique, counts = np.unique(random_lengths, return_counts=True)
+
+    stacked = np.vstack((unique, counts)).T  # will be 2 x TOT_NUM
+
+    if random.random() <= 1:  # flip half the time
+        stacked = stacked[::-1]
+
+    for idx, (rand_length, count) in enumerate(stacked):
+        tnow = datetime.now()
+        rand_length_zfill = str(int(rand_length)).zfill(5)
+
+        print(f"{tnow} || Generating: {count} {rand_length}-long backbone(s).")
+        random_tag = generate_random_string(length=6)
+
+        if client is not None:
+            #             cmd = f"python /root/run_inference.py 'contigmap.contigs=[{rand_length}-{rand_length}]' \
+            # inference.output_prefix={prefix}/{datetime_stamp}_{rand_length_zfill}AA \
+            # inference.num_designs={count} inference.ckpt_override_path={args.checkpoint} ++num_threads={args.num_threads}"
+            cmd = "python /root/run_inference.py"
+            extra_cfgs = [
+                f"inference.output_prefix={prefix}/{datetime_stamp}_{random_tag}_{rand_length_zfill}AA",
+                f"contigmap.contigs=[{rand_length}-{rand_length}]",
+                f"inference.num_designs={count}",
+            ]
+            for cfg in extra_cfgs + overrides:
+                cmd += f" {cfg}"
+            print(f"Running cmd: {cmd}")
+
+            client.containers.run(
+                container,
+                cmd,
+                device_requests=device,
+                volumes=volume,
+                ulimits=ulimits,
+            )
+
+        else:
+            from os import chdir
+
+            chdir(
+                "/root/"
+            )  # base IPD & SOvchinnikov code gets copied to /root for some reason
+            with initialize(
+                version_base=None, config_path="../../root/config/inference"
+            ):  # file struct is config/inference/{base.yaml, symmetry.yaml}
+                # see explanation in comment at start of this file to see more details about this
+                # this is equivalent roughly to:
+                # python /root/run_inference.py ...[settings]... (example inference.num_designs=3)
+                cfg = compose(
+                    "base",
+                    overrides=overrides
+                    + [
+                        f"inference.output_prefix={prefix}/{datetime_stamp}_{random_tag}_{rand_length_zfill}AA",
+                        f"contigmap.contigs=[{rand_length}-{rand_length}]",
+                        f"inference.num_designs={count}",
+                    ],
+                )
+
+            run_inference.main(cfg)
+    if args.cleanup is True:
+        print("args.cleanup is True: deleting `traj` files.")
+        pth_to_delete = pathlib.Path(prefix)/'traj'
+        if pth_to_delete.exists() is False:
+            print(f"Path to delete {pth_to_delete} does not exist. Continuing.")
+        else:
+            shutil.rmtree(pth_to_delete)
+        print("Finished deleting `traj` files.")
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

From 21baeb729d65efe2d60680efd66139efa4da87c7 Mon Sep 17 00:00:00 2001
From: Alex Lee <Alex.lee@ucsf.edu>
Date: Wed, 16 Jul 2025 20:30:14 -0700
Subject: [PATCH 3/3] move br scripts to datasets dir

---
 {analysis => datasets}/br/foldseek/clust_synalone.yaml         | 0
 {analysis => datasets}/br/foldseek/clust_together.yaml         | 0
 {analysis => datasets}/br/foldseek/cluster_pdb_alone.yaml      | 0
 {analysis => datasets}/br/foldseek/parallell_search.yaml       | 0
 {analysis => datasets}/br/omegafold/Dockerfile                 | 0
 {analysis => datasets}/br/omegafold/parallel.yaml              | 0
 {analysis => datasets}/br/omegafold/single.yaml                | 0
 {analysis => datasets}/br/pmpnn/Dockerfile                     | 0
 {analysis => datasets}/br/pmpnn/base_parallel.yaml             | 0
 {analysis => datasets}/br/pydssp/Dockerfile                    | 0
 {analysis => datasets}/br/pydssp/pydssp_parallel.yaml          | 0
 {analysis => datasets}/br/rfdiffusion/Dockerfile               | 0
 {analysis => datasets}/br/rfdiffusion/amulet/Dockerfile.amulet | 0
 {analysis => datasets}/br/rfdiffusion/amulet/job.yaml          | 0
 {analysis => datasets}/br/rfdiffusion/amulet/parallel.yaml     | 0
 {analysis => datasets}/br/rfdiffusion/gen_rfdiff.py            | 0
 16 files changed, 0 insertions(+), 0 deletions(-)
 rename {analysis => datasets}/br/foldseek/clust_synalone.yaml (100%)
 rename {analysis => datasets}/br/foldseek/clust_together.yaml (100%)
 rename {analysis => datasets}/br/foldseek/cluster_pdb_alone.yaml (100%)
 rename {analysis => datasets}/br/foldseek/parallell_search.yaml (100%)
 rename {analysis => datasets}/br/omegafold/Dockerfile (100%)
 rename {analysis => datasets}/br/omegafold/parallel.yaml (100%)
 rename {analysis => datasets}/br/omegafold/single.yaml (100%)
 rename {analysis => datasets}/br/pmpnn/Dockerfile (100%)
 rename {analysis => datasets}/br/pmpnn/base_parallel.yaml (100%)
 rename {analysis => datasets}/br/pydssp/Dockerfile (100%)
 rename {analysis => datasets}/br/pydssp/pydssp_parallel.yaml (100%)
 rename {analysis => datasets}/br/rfdiffusion/Dockerfile (100%)
 rename {analysis => datasets}/br/rfdiffusion/amulet/Dockerfile.amulet (100%)
 rename {analysis => datasets}/br/rfdiffusion/amulet/job.yaml (100%)
 rename {analysis => datasets}/br/rfdiffusion/amulet/parallel.yaml (100%)
 rename {analysis => datasets}/br/rfdiffusion/gen_rfdiff.py (100%)

diff --git a/analysis/br/foldseek/clust_synalone.yaml b/datasets/br/foldseek/clust_synalone.yaml
similarity index 100%
rename from analysis/br/foldseek/clust_synalone.yaml
rename to datasets/br/foldseek/clust_synalone.yaml
diff --git a/analysis/br/foldseek/clust_together.yaml b/datasets/br/foldseek/clust_together.yaml
similarity index 100%
rename from analysis/br/foldseek/clust_together.yaml
rename to datasets/br/foldseek/clust_together.yaml
diff --git a/analysis/br/foldseek/cluster_pdb_alone.yaml b/datasets/br/foldseek/cluster_pdb_alone.yaml
similarity index 100%
rename from analysis/br/foldseek/cluster_pdb_alone.yaml
rename to datasets/br/foldseek/cluster_pdb_alone.yaml
diff --git a/analysis/br/foldseek/parallell_search.yaml b/datasets/br/foldseek/parallell_search.yaml
similarity index 100%
rename from analysis/br/foldseek/parallell_search.yaml
rename to datasets/br/foldseek/parallell_search.yaml
diff --git a/analysis/br/omegafold/Dockerfile b/datasets/br/omegafold/Dockerfile
similarity index 100%
rename from analysis/br/omegafold/Dockerfile
rename to datasets/br/omegafold/Dockerfile
diff --git a/analysis/br/omegafold/parallel.yaml b/datasets/br/omegafold/parallel.yaml
similarity index 100%
rename from analysis/br/omegafold/parallel.yaml
rename to datasets/br/omegafold/parallel.yaml
diff --git a/analysis/br/omegafold/single.yaml b/datasets/br/omegafold/single.yaml
similarity index 100%
rename from analysis/br/omegafold/single.yaml
rename to datasets/br/omegafold/single.yaml
diff --git a/analysis/br/pmpnn/Dockerfile b/datasets/br/pmpnn/Dockerfile
similarity index 100%
rename from analysis/br/pmpnn/Dockerfile
rename to datasets/br/pmpnn/Dockerfile
diff --git a/analysis/br/pmpnn/base_parallel.yaml b/datasets/br/pmpnn/base_parallel.yaml
similarity index 100%
rename from analysis/br/pmpnn/base_parallel.yaml
rename to datasets/br/pmpnn/base_parallel.yaml
diff --git a/analysis/br/pydssp/Dockerfile b/datasets/br/pydssp/Dockerfile
similarity index 100%
rename from analysis/br/pydssp/Dockerfile
rename to datasets/br/pydssp/Dockerfile
diff --git a/analysis/br/pydssp/pydssp_parallel.yaml b/datasets/br/pydssp/pydssp_parallel.yaml
similarity index 100%
rename from analysis/br/pydssp/pydssp_parallel.yaml
rename to datasets/br/pydssp/pydssp_parallel.yaml
diff --git a/analysis/br/rfdiffusion/Dockerfile b/datasets/br/rfdiffusion/Dockerfile
similarity index 100%
rename from analysis/br/rfdiffusion/Dockerfile
rename to datasets/br/rfdiffusion/Dockerfile
diff --git a/analysis/br/rfdiffusion/amulet/Dockerfile.amulet b/datasets/br/rfdiffusion/amulet/Dockerfile.amulet
similarity index 100%
rename from analysis/br/rfdiffusion/amulet/Dockerfile.amulet
rename to datasets/br/rfdiffusion/amulet/Dockerfile.amulet
diff --git a/analysis/br/rfdiffusion/amulet/job.yaml b/datasets/br/rfdiffusion/amulet/job.yaml
similarity index 100%
rename from analysis/br/rfdiffusion/amulet/job.yaml
rename to datasets/br/rfdiffusion/amulet/job.yaml
diff --git a/analysis/br/rfdiffusion/amulet/parallel.yaml b/datasets/br/rfdiffusion/amulet/parallel.yaml
similarity index 100%
rename from analysis/br/rfdiffusion/amulet/parallel.yaml
rename to datasets/br/rfdiffusion/amulet/parallel.yaml
diff --git a/analysis/br/rfdiffusion/gen_rfdiff.py b/datasets/br/rfdiffusion/gen_rfdiff.py
similarity index 100%
rename from analysis/br/rfdiffusion/gen_rfdiff.py
rename to datasets/br/rfdiffusion/gen_rfdiff.py