From 24489ae0335e9326b517513befe16e99fd8e170c Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 16 Dec 2025 23:26:15 +0100 Subject: [PATCH 1/9] update plan files for ob 0.4.0 --- Clustering_conda.yml | 57 +++---- Clustering_envmodules.yml | 309 ++++++++++++++++++++++--------------- Clustering_oras.yml | 311 +++++++++++++++++++++++--------------- 3 files changed, 410 insertions(+), 267 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index 9a8e52c..ff7e1a6 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -1,11 +1,11 @@ id: clustering_example_conda description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2. -version: 1.4 +version: "1.5.0" benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo" -storage: http://omnibenchmark.org:9000 -benchmark_yaml_spec: 0.04 -storage_api: S3 -storage_bucket_name: clusteringexampleconda +storage: + api: S3 + endpoint: http://omnibenchmark.mls.uzh.ch:9000 + bucket_name: clusteringexampleconda software_backend: conda software_environments: clustbench: @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: "{input}/{name}/plotting_report.html" + path: plotting_report.html stages: ## clustbench data ########################################################## @@ -42,8 +42,9 @@ stages: url: https://github.com/imallona/clustbench_data commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 + - dataset_generator: "fcps" + dataset_name: ["atom", "chainlink"] # 2 1 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 @@ -106,12 +107,12 @@ stages: # - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1 outputs: - id: data.matrix - path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz" + path: "{dataset}.data.gz" - id: data.true_labels - path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz" + path: "{dataset}.labels0.gz" ## clustbench methods (fastcluster) ################################################################### - + - id: clustering modules: - id: fastcluster @@ -122,7 +123,8 @@ stages: # url: /home/imallona/src/clustbench_fastcluster/ commit: "45e43d3" parameters: - - values: ["--linkage", "complete"] + - linkage: "complete" + # - values: ["--linkage", "ward"] # - values: ["--linkage", "average"] # - values: ["--linkage", "weighted"] @@ -136,7 +138,7 @@ stages: #url: /home/imallona/src/clustbench_sklearn commit: 5877378 parameters: - - values: ["--method", "birch"] + - method: "birch" # - values: ["--method", "kmeans"] # - values: ["--method", "spectral"] ## too slow # - values: ["--method", "gm"] @@ -147,7 +149,7 @@ stages: url: https://github.com/imallona/clustbench_agglomerative commit: 5454368 parameters: - - values: ["--linkage", "average"] + - linkage: "average" # - values: ["--linkage", "complete"] # - values: ["--linkage", "ward"] - id: genieclust @@ -157,7 +159,8 @@ stages: url: https://github.com/imallona/clustbench_genieclust commit: 6090043 parameters: - - values: ["--method", "genie", "--gini_threshold", 0.5] + - method: "genie" + gini_threshold: 0.5 # - values: ["--method", "gic"] # - values: ["--method", "ica"] - id: fcps @@ -168,24 +171,24 @@ stages: commit: fc37faa parameters: # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in Conda - - values: ["--method", "FCPS_Minimax", "--seed", 2] + - method: "FCPS_Minimax" + seed: 2 # - values: ["--method", "FCPS_MinEnergy", "--seed", 2] # - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] # - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] # - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] # - values: ["--method", "FCPS_Diana", "--seed", 2] # - values: ["--method", "FCPS_Fanny", "--seed", 2] - # - values: ["--method", "FCPS_Hardcl", "--seed", 2] - # - values: ["--method", "FCPS_Softcl", "--seed", 2] + # - values: ["--method", "FCPS_Hardcl", "--seed", 2] + # - values: ["--method", "FCPS_Softcl", "--seed", 2] # - values: ["--method", "FCPS_Clara", "--seed", 2] # - values: ["--method", "FCPS_PAM", "--seed", 2] inputs: - - entries: - - data.matrix - - data.true_labels + - data.matrix + - data.true_labels outputs: - id: clustering.predicted_ks_range - path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz" + path: "{dataset}_ks_range.labels.gz" - id: metrics modules: @@ -196,8 +199,7 @@ stages: url: https://github.com/imallona/clustbench_metrics commit: 9132d45 parameters: - - values: ["--metric", "normalized_clustering_accuracy"] - - values: ["--metric", "adjusted_fm_score"] + - metric: ["normalized_clustering_accuracy", "adjusted_fm_score"] # - values: ["--metric", "adjusted_mi_score"] # - values: ["--metric", "adjusted_rand_score"] # - values: ["--metric", "fm_score"] @@ -208,9 +210,8 @@ stages: # - values: ["--metric", "pair_sets_index"] # - values: ["--metric", "rand_score"] inputs: - - entries: - - clustering.predicted_ks_range - - data.true_labels + - clustering.predicted_ks_range + - data.true_labels outputs: - id: metrics.scores - path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" + path: "{dataset}.scores.gz" diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index 1b0afee..c955c28 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -1,11 +1,11 @@ id: clustering_example_envmodules -description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2. -version: 1.4 +description: "Clustering benchmark on Gagolewski's, true number of clusters plus minus 2." +version: "1.5.0" benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo" -storage: http://omnibenchmark.org:9000 -benchmark_yaml_spec: 0.04 -storage_api: S3 -storage_bucket_name: clusteringexampleenvmodules +storage: + api: S3 + endpoint: http://omnibenchmark.mls.uzh.ch:9000 + bucket_name: clusteringexampleenvmodules software_backend: envmodules software_environments: clustbench: @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: "{input}/{name}/plotting_report.html" + path: plotting_report.html stages: ## clustbench data ########################################################## @@ -42,76 +42,137 @@ stages: url: https://github.com/imallona/clustbench_data commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 - # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2 - # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2 - # - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1 + - dataset_generator: "fcps" + dataset_name: ["atom", "chainlink"] # 2 1 + + # - dataset_generator: "fcps" + # dataset_name: "engytime" # 2 2 + # - dataset_generator: "fcps" + # dataset_name: "hepta" # 7 1 + # - dataset_generator: "fcps" + # dataset_name: "lsun" # 3 1 + # - dataset_generator: "fcps" + # dataset_name: "target" # 2, 6 2 + # - dataset_generator: "fcps" + # dataset_name: "tetra" # 4 1 + # - dataset_generator: "fcps" + # dataset_name: "twodiamonds" # 2 1 + # - dataset_generator: "fcps" + # dataset_name: "wingnut" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "dense" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "fuzzyx" # 2, 4, 5 6 + # - dataset_generator: "graves" + # dataset_name: "line" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "parabolic" # 2, 42 + # - dataset_generator: "graves" + # dataset_name: "ring" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "ring_noisy" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "ring_outliers" # 2, 52 + # - dataset_generator: "graves" + # dataset_name: "zigzag" # 3, 5 2 + # - dataset_generator: "graves" + # dataset_name: "zigzag_noisy" # 3, 52 + # - dataset_generator: "graves" + # dataset_name: "zigzag_outliers" # 3, 52 + # - dataset_generator: "other" + # dataset_name: "chameleon_t4_8k" # 6 1 + # - dataset_generator: "other" + # dataset_name: "chameleon_t5_8k" # 6 1 + # - dataset_generator: "other" + # dataset_name: "hdbscan" # 6 1 + # - dataset_generator: "other" + # dataset_name: "iris" # 3 1 + # - dataset_generator: "other" + # dataset_name: "iris5" # 3 1 + # - dataset_generator: "other" + # dataset_name: "square" # 2 1 + # - dataset_generator: "sipu" + # dataset_name: "aggregation" # 7 1 + # - dataset_generator: "sipu" + # dataset_name: "compound" # 4, 5, 6 5 + # - dataset_generator: "sipu" + # dataset_name: "flame" # 2 2 + # - dataset_generator: "sipu" + # dataset_name: "jain" # 2 1 + # - dataset_generator: "sipu" + # dataset_name: "pathbased" # 3, 4 2 + # - dataset_generator: "sipu" + # dataset_name: "r15" # 8, 9, 15 3 + # - dataset_generator: "sipu" + # dataset_name: "spiral" # 3 1 + # - dataset_generator: "sipu" + # dataset_name: "unbalance" # 8 1 + # - dataset_generator: "uci" + # dataset_name: "ecoli" # 8 1 + # - dataset_generator: "uci" + # dataset_name: "ionosphere" # 2 1 + # - dataset_generator: "uci" + # dataset_name: "sonar" # 2 1 + # - dataset_generator: "uci" + # dataset_name: "statlog" # 7 1 + # - dataset_generator: "uci" + # dataset_name: "wdbc" # 2 1 + # - dataset_generator: "uci" + # dataset_name: "wine" # 3 1 + # - dataset_generator: "uci" + # dataset_name: "yeast" # 10 1 + # - dataset_generator: "wut" + # dataset_name: "circles" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "cross" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "graph" # 10 1 + # - dataset_generator: "wut" + # dataset_name: "isolation" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "labirynth" # 6 1 + # - dataset_generator: "wut" + # dataset_name: "mk1" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "mk2" # 2 1 + # - dataset_generator: "wut" + # dataset_name: "mk3" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "mk4" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "olympic" # 5 1 + # - dataset_generator: "wut" + # dataset_name: "smile" # 4, 6 2 + # - dataset_generator: "wut" + # dataset_name: "stripes" # 2 1 + # - dataset_generator: "wut" + # dataset_name: "trajectories" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "trapped_lovers" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "twosplashes" # 2 1 + # - dataset_generator: "wut" + # dataset_name: "windows" # 5 1 + # - dataset_generator: "wut" + # dataset_name: "x1" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "x2" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "x3" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "z1" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "z2" # 5 1 + # - dataset_generator: "wut" + # dataset_name: "z3" # 4 1 outputs: - id: data.matrix - path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz" + path: "{dataset}.data.gz" - id: data.true_labels - path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz" + path: "{dataset}.labels0.gz" ## clustbench methods (fastcluster) ################################################################### - + - id: clustering modules: - id: fastcluster @@ -122,12 +183,13 @@ stages: # url: /home/imallona/src/clustbench_fastcluster/ commit: "45e43d3" parameters: - - values: ["--linkage", "complete"] - # - values: ["--linkage", "ward"] - # - values: ["--linkage", "average"] - # - values: ["--linkage", "weighted"] - # - values: ["--linkage", "median"] - # - values: ["--linkage", "centroid"] + - linkage: "complete" + + # - linkage: "ward" + # - linkage: "average" + # - linkage: "weighted" + # - linkage: "median" + # - linkage: "centroid" - id: sklearn name: "sklearn" software_environment: "clustbench" @@ -136,10 +198,10 @@ stages: #url: /home/imallona/src/clustbench_sklearn commit: 5877378 parameters: - - values: ["--method", "birch"] - # - values: ["--method", "kmeans"] - # - values: ["--method", "spectral"] ## too slow - # - values: ["--method", "gm"] + - method: "birch" + # - method: "kmeans" + # - method: "spectral" ## too slow + # - method: "gm" - id: agglomerative name: "agglomerative" software_environment: "clustbench" @@ -147,9 +209,9 @@ stages: url: https://github.com/imallona/clustbench_agglomerative commit: 5454368 parameters: - - values: ["--linkage", "average"] - # - values: ["--linkage", "complete"] - # - values: ["--linkage", "ward"] + - linkage: "average" + # - linkage: "complete" + # - linkage: "ward" - id: genieclust name: "genieclust" software_environment: "clustbench" @@ -157,9 +219,10 @@ stages: url: https://github.com/imallona/clustbench_genieclust commit: 6090043 parameters: - - values: ["--method", "genie", "--gini_threshold", 0.5] - # - values: ["--method", "gic"] - # - values: ["--method", "ica"] + - method: "genie" + gini_threshold: 0.5 + # - method: "gic" + # - method: "ica" - id: fcps name: "fcps" software_environment: "fcps" @@ -167,25 +230,35 @@ stages: url: https://github.com/imallona/clustbench_fcps commit: fc37faa parameters: - # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in Conda - - values: ["--method", "FCPS_Minimax", "--seed", 2] - # - values: ["--method", "FCPS_MinEnergy", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] - # - values: ["--method", "FCPS_Diana", "--seed", 2] - # - values: ["--method", "FCPS_Fanny", "--seed", 2] - # - values: ["--method", "FCPS_Hardcl", "--seed", 2] - # - values: ["--method", "FCPS_Softcl", "--seed", 2] - # - values: ["--method", "FCPS_Clara", "--seed", 2] - # - values: ["--method", "FCPS_PAM", "--seed", 2] + # - method: "FCPS_AdaptiveDensityPeak" # not in Conda + - method: "FCPS_Minimax" + seed: 2 + # - method: "FCPS_MinEnergy" + # seed: 2 + # - method: "FCPS_HDBSCAN_2" + # seed: 2 + # - method: "FCPS_HDBSCAN_4" + # seed: 2 + # - method: "FCPS_HDBSCAN_8" + # seed: 2 + # - method: "FCPS_Diana" + # seed: 2 + # - method: "FCPS_Fanny" + # seed: 2 + # - method: "FCPS_Hardcl" + # seed: 2 + # - method: "FCPS_Softcl" + # seed: 2 + # - method: "FCPS_Clara" + # seed: 2 + # - method: "FCPS_PAM" + # seed: 2 inputs: - - entries: - - data.matrix - - data.true_labels + - data.matrix + - data.true_labels outputs: - id: clustering.predicted_ks_range - path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz" + path: "{dataset}_ks_range.labels.gz" - id: metrics modules: @@ -196,21 +269,19 @@ stages: url: https://github.com/imallona/clustbench_metrics commit: 9132d45 parameters: - - values: ["--metric", "normalized_clustering_accuracy"] - - values: ["--metric", "adjusted_fm_score"] - # - values: ["--metric", "adjusted_mi_score"] - # - values: ["--metric", "adjusted_rand_score"] - # - values: ["--metric", "fm_score"] - # - values: ["--metric", "mi_score"] - # - values: ["--metric", "normalized_clustering_accuracy"] - # - values: ["--metric", "normalized_mi_score"] - # - values: ["--metric", "normalized_pivoted_accuracy"] - # - values: ["--metric", "pair_sets_index"] - # - values: ["--metric", "rand_score"] + - metric: ["normalized_clustering_accuracy", "adjusted_fm_score"] + # - metric: "adjusted_mi_score" + # - metric: "adjusted_rand_score" + # - metric: "fm_score" + # - metric: "mi_score" + # - metric: "normalized_clustering_accuracy" + # - metric: "normalized_mi_score" + # - metric: "normalized_pivoted_accuracy" + # - metric: "pair_sets_index" + # - metric: "rand_score" inputs: - - entries: - - clustering.predicted_ks_range - - data.true_labels + - clustering.predicted_ks_range + - data.true_labels outputs: - id: metrics.scores - path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" + path: "{dataset}.scores.gz" diff --git a/Clustering_oras.yml b/Clustering_oras.yml index b8afaeb..139a3a4 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -1,11 +1,11 @@ id: clustering_example_oras -description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2. -version: 1.4 +description: "Clustering benchmark on Gagolewski's, true number of clusters plus minus 2." +version: "1.5.0" benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo" -storage: http://omnibenchmark.org:9000 -benchmark_yaml_spec: 0.04 -storage_api: S3 -storage_bucket_name: clusteringexampleoras +storage: + api: S3 + endpoint: http://omnibenchmark.mls.uzh.ch:9000 + bucket_name: clusteringexampleoras software_backend: apptainer software_environments: clustbench: @@ -21,7 +21,7 @@ software_environments: metric_collectors: - id: plotting name: "Single-backend metric collector." - software_environment: "fcps" + software_environment: fcps repository: url: https://github.com/imallona/clustering_report commit: bbb9d56 @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: "{input}/{name}/plotting_report.html" + path: plotting_report.html stages: ## clustbench data ########################################################## @@ -42,76 +42,137 @@ stages: url: https://github.com/imallona/clustbench_data commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 - # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2 - # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2 - # - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1 + - dataset_generator: "fcps" + dataset_name: ["atom", "chainlink"] # 2 1 + + # - dataset_generator: "fcps" + # dataset_name: "engytime" # 2 2 + # - dataset_generator: "fcps" + # dataset_name: "hepta" # 7 1 + # - dataset_generator: "fcps" + # dataset_name: "lsun" # 3 1 + # - dataset_generator: "fcps" + # dataset_name: "target" # 2, 6 2 + # - dataset_generator: "fcps" + # dataset_name: "tetra" # 4 1 + # - dataset_generator: "fcps" + # dataset_name: "twodiamonds" # 2 1 + # - dataset_generator: "fcps" + # dataset_name: "wingnut" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "dense" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "fuzzyx" # 2, 4, 5 6 + # - dataset_generator: "graves" + # dataset_name: "line" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "parabolic" # 2, 42 + # - dataset_generator: "graves" + # dataset_name: "ring" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "ring_noisy" # 2 1 + # - dataset_generator: "graves" + # dataset_name: "ring_outliers" # 2, 52 + # - dataset_generator: "graves" + # dataset_name: "zigzag" # 3, 5 2 + # - dataset_generator: "graves" + # dataset_name: "zigzag_noisy" # 3, 52 + # - dataset_generator: "graves" + # dataset_name: "zigzag_outliers" # 3, 52 + # - dataset_generator: "other" + # dataset_name: "chameleon_t4_8k" # 6 1 + # - dataset_generator: "other" + # dataset_name: "chameleon_t5_8k" # 6 1 + # - dataset_generator: "other" + # dataset_name: "hdbscan" # 6 1 + # - dataset_generator: "other" + # dataset_name: "iris" # 3 1 + # - dataset_generator: "other" + # dataset_name: "iris5" # 3 1 + # - dataset_generator: "other" + # dataset_name: "square" # 2 1 + # - dataset_generator: "sipu" + # dataset_name: "aggregation" # 7 1 + # - dataset_generator: "sipu" + # dataset_name: "compound" # 4, 5, 6 5 + # - dataset_generator: "sipu" + # dataset_name: "flame" # 2 2 + # - dataset_generator: "sipu" + # dataset_name: "jain" # 2 1 + # - dataset_generator: "sipu" + # dataset_name: "pathbased" # 3, 4 2 + # - dataset_generator: "sipu" + # dataset_name: "r15" # 8, 9, 15 3 + # - dataset_generator: "sipu" + # dataset_name: "spiral" # 3 1 + # - dataset_generator: "sipu" + # dataset_name: "unbalance" # 8 1 + # - dataset_generator: "uci" + # dataset_name: "ecoli" # 8 1 + # - dataset_generator: "uci" + # dataset_name: "ionosphere" # 2 1 + # - dataset_generator: "uci" + # dataset_name: "sonar" # 2 1 + # - dataset_generator: "uci" + # dataset_name: "statlog" # 7 1 + # - dataset_generator: "uci" + # dataset_name: "wdbc" # 2 1 + # - dataset_generator: "uci" + # dataset_name: "wine" # 3 1 + # - dataset_generator: "uci" + # dataset_name: "yeast" # 10 1 + # - dataset_generator: "wut" + # dataset_name: "circles" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "cross" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "graph" # 10 1 + # - dataset_generator: "wut" + # dataset_name: "isolation" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "labirynth" # 6 1 + # - dataset_generator: "wut" + # dataset_name: "mk1" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "mk2" # 2 1 + # - dataset_generator: "wut" + # dataset_name: "mk3" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "mk4" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "olympic" # 5 1 + # - dataset_generator: "wut" + # dataset_name: "smile" # 4, 6 2 + # - dataset_generator: "wut" + # dataset_name: "stripes" # 2 1 + # - dataset_generator: "wut" + # dataset_name: "trajectories" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "trapped_lovers" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "twosplashes" # 2 1 + # - dataset_generator: "wut" + # dataset_name: "windows" # 5 1 + # - dataset_generator: "wut" + # dataset_name: "x1" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "x2" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "x3" # 4 1 + # - dataset_generator: "wut" + # dataset_name: "z1" # 3 1 + # - dataset_generator: "wut" + # dataset_name: "z2" # 5 1 + # - dataset_generator: "wut" + # dataset_name: "z3" # 4 1 outputs: - id: data.matrix - path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz" + path: "{dataset}.data.gz" - id: data.true_labels - path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz" + path: "{dataset}.labels0.gz" ## clustbench methods (fastcluster) ################################################################### - + - id: clustering modules: - id: fastcluster @@ -122,12 +183,13 @@ stages: # url: /home/imallona/src/clustbench_fastcluster/ commit: "45e43d3" parameters: - - values: ["--linkage", "complete"] - # - values: ["--linkage", "ward"] - # - values: ["--linkage", "average"] - # - values: ["--linkage", "weighted"] - # - values: ["--linkage", "median"] - # - values: ["--linkage", "centroid"] + - linkage: "complete" + + # - linkage: "ward" + # - linkage: "average" + # - linkage: "weighted" + # - linkage: "median" + # - linkage: "centroid" - id: sklearn name: "sklearn" software_environment: "clustbench" @@ -136,10 +198,10 @@ stages: #url: /home/imallona/src/clustbench_sklearn commit: 5877378 parameters: - - values: ["--method", "birch"] - # - values: ["--method", "kmeans"] - # - values: ["--method", "spectral"] ## too slow - # - values: ["--method", "gm"] + - method: "birch" + # - method: "kmeans" + # - method: "spectral" ## too slow + # - method: "gm" - id: agglomerative name: "agglomerative" software_environment: "clustbench" @@ -147,9 +209,9 @@ stages: url: https://github.com/imallona/clustbench_agglomerative commit: 5454368 parameters: - - values: ["--linkage", "average"] - # - values: ["--linkage", "complete"] - # - values: ["--linkage", "ward"] + - linkage: "average" + # - linkage: "complete" + # - linkage: "ward" - id: genieclust name: "genieclust" software_environment: "clustbench" @@ -157,9 +219,10 @@ stages: url: https://github.com/imallona/clustbench_genieclust commit: 6090043 parameters: - - values: ["--method", "genie", "--gini_threshold", 0.5] - # - values: ["--method", "gic"] - # - values: ["--method", "ica"] + - method: "genie" + gini_threshold: 0.5 + # - method: "gic" + # - method: "ica" - id: fcps name: "fcps" software_environment: "fcps" @@ -167,25 +230,35 @@ stages: url: https://github.com/imallona/clustbench_fcps commit: fc37faa parameters: - # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in Conda - - values: ["--method", "FCPS_Minimax", "--seed", 2] - # - values: ["--method", "FCPS_MinEnergy", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] - # - values: ["--method", "FCPS_Diana", "--seed", 2] - # - values: ["--method", "FCPS_Fanny", "--seed", 2] - # - values: ["--method", "FCPS_Hardcl", "--seed", 2] - # - values: ["--method", "FCPS_Softcl", "--seed", 2] - # - values: ["--method", "FCPS_Clara", "--seed", 2] - # - values: ["--method", "FCPS_PAM", "--seed", 2] + # - method: "FCPS_AdaptiveDensityPeak" # not in Conda + - method: "FCPS_Minimax" + seed: 2 + # - method: "FCPS_MinEnergy" + # seed: 2 + # - method: "FCPS_HDBSCAN_2" + # seed: 2 + # - method: "FCPS_HDBSCAN_4" + # seed: 2 + # - method: "FCPS_HDBSCAN_8" + # seed: 2 + # - method: "FCPS_Diana" + # seed: 2 + # - method: "FCPS_Fanny" + # seed: 2 + # - method: "FCPS_Hardcl" + # seed: 2 + # - method: "FCPS_Softcl" + # seed: 2 + # - method: "FCPS_Clara" + # seed: 2 + # - method: "FCPS_PAM" + # seed: 2 inputs: - - entries: - - data.matrix - - data.true_labels + - data.matrix + - data.true_labels outputs: - id: clustering.predicted_ks_range - path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz" + path: "{dataset}_ks_range.labels.gz" - id: metrics modules: @@ -196,21 +269,19 @@ stages: url: https://github.com/imallona/clustbench_metrics commit: 9132d45 parameters: - - values: ["--metric", "normalized_clustering_accuracy"] - - values: ["--metric", "adjusted_fm_score"] - # - values: ["--metric", "adjusted_mi_score"] - # - values: ["--metric", "adjusted_rand_score"] - # - values: ["--metric", "fm_score"] - # - values: ["--metric", "mi_score"] - # - values: ["--metric", "normalized_clustering_accuracy"] - # - values: ["--metric", "normalized_mi_score"] - # - values: ["--metric", "normalized_pivoted_accuracy"] - # - values: ["--metric", "pair_sets_index"] - # - values: ["--metric", "rand_score"] + - metric: ["normalized_clustering_accuracy", "adjusted_fm_score"] + # - metric: "adjusted_mi_score" + # - metric: "adjusted_rand_score" + # - metric: "fm_score" + # - metric: "mi_score" + # - metric: "normalized_clustering_accuracy" + # - metric: "normalized_mi_score" + # - metric: "normalized_pivoted_accuracy" + # - metric: "pair_sets_index" + # - metric: "rand_score" inputs: - - entries: - - clustering.predicted_ks_range - - data.true_labels + - clustering.predicted_ks_range + - data.true_labels outputs: - id: metrics.scores - path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" + path: "{dataset}.scores.gz" From a957616ba7b2a074e964896e56d07962f316e8e9 Mon Sep 17 00:00:00 2001 From: ben Date: Wed, 7 Jan 2026 17:21:58 +0100 Subject: [PATCH 2/9] fix path for metric collector kudos to atreya for figuring this out --- Clustering_conda.yml | 2 +- Clustering_envmodules.yml | 2 +- Clustering_oras.yml | 57 +++------------------------------------ 3 files changed, 5 insertions(+), 56 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index ff7e1a6..58d3f89 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: plotting_report.html + path: "{input}/{name}/plotting_report.html" stages: ## clustbench data ########################################################## diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index c955c28..c46f852 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: plotting_report.html + path: "{input}/{name}/plotting_report.html" stages: ## clustbench data ########################################################## diff --git a/Clustering_oras.yml b/Clustering_oras.yml index 139a3a4..8090728 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: plotting_report.html + path: "{input}/{name}/plotting_report.html" stages: ## clustbench data ########################################################## @@ -43,7 +43,7 @@ stages: commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - dataset_generator: "fcps" - dataset_name: ["atom", "chainlink"] # 2 1 + dataset_name: ["atom"] # 2 1 # - dataset_generator: "fcps" # dataset_name: "engytime" # 2 2 @@ -202,57 +202,6 @@ stages: # - method: "kmeans" # - method: "spectral" ## too slow # - method: "gm" - - id: agglomerative - name: "agglomerative" - software_environment: "clustbench" - repository: - url: https://github.com/imallona/clustbench_agglomerative - commit: 5454368 - parameters: - - linkage: "average" - # - linkage: "complete" - # - linkage: "ward" - - id: genieclust - name: "genieclust" - software_environment: "clustbench" - repository: - url: https://github.com/imallona/clustbench_genieclust - commit: 6090043 - parameters: - - method: "genie" - gini_threshold: 0.5 - # - method: "gic" - # - method: "ica" - - id: fcps - name: "fcps" - software_environment: "fcps" - repository: - url: https://github.com/imallona/clustbench_fcps - commit: fc37faa - parameters: - # - method: "FCPS_AdaptiveDensityPeak" # not in Conda - - method: "FCPS_Minimax" - seed: 2 - # - method: "FCPS_MinEnergy" - # seed: 2 - # - method: "FCPS_HDBSCAN_2" - # seed: 2 - # - method: "FCPS_HDBSCAN_4" - # seed: 2 - # - method: "FCPS_HDBSCAN_8" - # seed: 2 - # - method: "FCPS_Diana" - # seed: 2 - # - method: "FCPS_Fanny" - # seed: 2 - # - method: "FCPS_Hardcl" - # seed: 2 - # - method: "FCPS_Softcl" - # seed: 2 - # - method: "FCPS_Clara" - # seed: 2 - # - method: "FCPS_PAM" - # seed: 2 inputs: - data.matrix - data.true_labels @@ -269,7 +218,7 @@ stages: url: https://github.com/imallona/clustbench_metrics commit: 9132d45 parameters: - - metric: ["normalized_clustering_accuracy", "adjusted_fm_score"] + - metric: ["normalized_clustering_accuracy"] # - metric: "adjusted_mi_score" # - metric: "adjusted_rand_score" # - metric: "fm_score" From 6e01fe92cd7f6fe2af633ce58e84c5b29c76c2a4 Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 13 Jan 2026 13:22:20 +0100 Subject: [PATCH 3/9] update data generator commit --- Clustering_conda.yml | 2 +- Clustering_envmodules.yml | 2 +- Clustering_oras.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index 58d3f89..9a1edd3 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -40,7 +40,7 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_data - commit: 366c5a2 + commit: 8fd8744 parameters: # comments depict the possible cardinalities and the number of curated labelsets - dataset_generator: "fcps" dataset_name: ["atom", "chainlink"] # 2 1 diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index c46f852..401cd32 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -40,7 +40,7 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_data - commit: 366c5a2 + commit: 8fd8744 parameters: # comments depict the possible cardinalities and the number of curated labelsets - dataset_generator: "fcps" dataset_name: ["atom", "chainlink"] # 2 1 diff --git a/Clustering_oras.yml b/Clustering_oras.yml index 8090728..8925bee 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -40,7 +40,7 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_data - commit: 366c5a2 + commit: 8fd8744 parameters: # comments depict the possible cardinalities and the number of curated labelsets - dataset_generator: "fcps" dataset_name: ["atom"] # 2 1 From ebf933cc172a5729fa58ccad143718961b89e883 Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 13 Jan 2026 13:25:25 +0100 Subject: [PATCH 4/9] no need for wget --- envs/clustbench.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/envs/clustbench.yml b/envs/clustbench.yml index 6cb6201..fe3e3a0 100644 --- a/envs/clustbench.yml +++ b/envs/clustbench.yml @@ -8,7 +8,6 @@ dependencies: - pip: #- "clustering-benchmarks==1.1.5" - 'https://github.com/gagolews/clustering-benchmarks/releases/download/v1.1.5/clustering_benchmarks-1.1.5.tar.gz' - - "wget" - "fastcluster==1.2.6" - "numpy==1.26.4" - "scipy==1.14.1" From 916061ee90eebed09ec62dbbff2353c9d7e574fb Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 13 Jan 2026 16:25:33 +0100 Subject: [PATCH 5/9] update modules to 0.4 metadata --- Clustering_conda.yml | 243 ++++++++++++++++++++++---------------- Clustering_envmodules.yml | 84 +++++-------- Clustering_oras.yml | 83 +++++++++---- 3 files changed, 229 insertions(+), 181 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index 9a1edd3..dd8755f 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -40,71 +40,118 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_data - commit: 8fd8744 + commit: fc67ebd parameters: # comments depict the possible cardinalities and the number of curated labelsets - dataset_generator: "fcps" dataset_name: ["atom", "chainlink"] # 2 1 - - # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 - # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 - # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1 - # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2 - # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2 - # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 - # - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 - # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1 - # - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2 - # - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1 - # - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1 + # - dataset_generator: "fcps" + # dataset_name: ["engytime", "hepta", "lsun", "target", "tetra", "twodiamonds", "wingnut"] # 7 1, 3 1, 2,6 2, 4 1, 2 1, 2 1 + # - dataset_generator: "graves" + # dataset_name: ["dense"] # 2 1 + # - dataset_generator: "graves" + # dataset_name: ["fuzzyx"] # 2,4,5 6 + # - dataset_generator: "graves" + # dataset_name: ["line"] # 2 1 + # - dataset_generator: "graves" + # dataset_name: ["parabolic"] # 2,4 2 + # - dataset_generator: "graves" + # dataset_name: ["ring"] # 2 1 + # - dataset_generator: "graves" + # dataset_name: ["ring_noisy"] # 2 1 + # - dataset_generator: "graves" + # dataset_name: ["ring_outliers"] # 2,5 2 + # - dataset_generator: "graves" + # dataset_name: ["zigzag"] # 3,5 2 + # - dataset_generator: "graves" + # dataset_name: ["zigzag_noisy"] # 3,5 2 + # - dataset_generator: "graves" + # dataset_name: ["zigzag_outliers"] # 3,5 2 + # - dataset_generator: "other" + # dataset_name: ["chameleon_t4_8k"] # 6 1 + # - dataset_generator: "other" + # dataset_name: ["chameleon_t5_8k"] # 6 1 + # - dataset_generator: "other" + # dataset_name: ["hdbscan"] # 6 1 + # - dataset_generator: "other" + # dataset_name: ["iris"] # 3 1 + # - dataset_generator: "other" + # dataset_name: ["iris5"] # 3 1 + # - dataset_generator: "other" + # dataset_name: ["square"] # 2 1 + # - dataset_generator: "sipu" + # dataset_name: ["aggregation"] # 7 1 + # - dataset_generator: "sipu" + # dataset_name: ["compound"] # 4,5,6 5 + # - dataset_generator: "sipu" + # dataset_name: ["flame"] # 2 2 + # - dataset_generator: "sipu" + # dataset_name: ["jain"] # 2 1 + # - dataset_generator: "sipu" + # dataset_name: ["pathbased"] # 3,4 2 + # - dataset_generator: "sipu" + # dataset_name: ["r15"] # 8,9,15 3 + # - dataset_generator: "sipu" + # dataset_name: ["spiral"] # 3 1 + # - dataset_generator: "sipu" + # dataset_name: ["unbalance"] # 8 1 + # - dataset_generator: "uci" + # dataset_name: ["ecoli"] # 8 1 + # - dataset_generator: "uci" + # dataset_name: ["ionosphere"] # 2 1 + # - dataset_generator: "uci" + # dataset_name: ["sonar"] # 2 1 + # - dataset_generator: "uci" + # dataset_name: ["statlog"] # 7 1 + # - dataset_generator: "uci" + # dataset_name: ["wdbc"] # 2 1 + # - dataset_generator: "uci" + # dataset_name: ["wine"] # 3 1 + # - dataset_generator: "uci" + # dataset_name: ["yeast"] # 10 1 + # - dataset_generator: "wut" + # dataset_name: ["circles"] # 4 1 + # - dataset_generator: "wut" + # dataset_name: ["cross"] # 4 1 + # - dataset_generator: "wut" + # dataset_name: ["graph"] # 10 1 + # - dataset_generator: "wut" + # dataset_name: ["isolation"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["labirynth"] # 6 1 + # - dataset_generator: "wut" + # dataset_name: ["mk1"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["mk2"] # 2 1 + # - dataset_generator: "wut" + # dataset_name: ["mk3"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["mk4"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["olympic"] # 5 1 + # - dataset_generator: "wut" + # dataset_name: ["smile"] # 4,6 2 + # - dataset_generator: "wut" + # dataset_name: ["stripes"] # 2 1 + # - dataset_generator: "wut" + # dataset_name: ["trajectories"] # 4 1 + # - dataset_generator: "wut" + # dataset_name: ["trapped_lovers"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["twosplashes"] # 2 1 + # - dataset_generator: "wut" + # dataset_name: ["windows"] # 5 1 + # - dataset_generator: "wut" + # dataset_name: ["x1"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["x2"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["x3"] # 4 1 + # - dataset_generator: "wut" + # dataset_name: ["z1"] # 3 1 + # - dataset_generator: "wut" + # dataset_name: ["z2"] # 5 1 + # - dataset_generator: "wut" + # dataset_name: ["z3"] # 4 1 outputs: - id: data.matrix path: "{dataset}.data.gz" @@ -120,69 +167,59 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_fastcluster - # url: /home/imallona/src/clustbench_fastcluster/ - commit: "45e43d3" + commit: e644ce5 parameters: - linkage: "complete" - - # - values: ["--linkage", "ward"] - # - values: ["--linkage", "average"] - # - values: ["--linkage", "weighted"] - # - values: ["--linkage", "median"] - # - values: ["--linkage", "centroid"] + #- linkage: ["ward", "average", "weighted", "median", "centroid"] - id: sklearn name: "sklearn" software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_sklearn - #url: /home/imallona/src/clustbench_sklearn - commit: 5877378 + commit: dcf35e1 parameters: - method: "birch" - # - values: ["--method", "kmeans"] - # - values: ["--method", "spectral"] ## too slow - # - values: ["--method", "gm"] + # ["kmeans, "gm"] + # ["spectral"] ## too slow - id: agglomerative name: "agglomerative" software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_agglomerative - commit: 5454368 + commit: 9d086a9 parameters: - linkage: "average" - # - values: ["--linkage", "complete"] - # - values: ["--linkage", "ward"] + # ["complete", "ward"] - id: genieclust name: "genieclust" software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_genieclust - commit: 6090043 + commit: 7d9e799 parameters: - method: "genie" + # method: ["gic", "ica"] gini_threshold: 0.5 - # - values: ["--method", "gic"] - # - values: ["--method", "ica"] - id: fcps name: "fcps" software_environment: "fcps" repository: url: https://github.com/imallona/clustbench_fcps - commit: fc37faa + commit: e780fed parameters: - # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in Conda - method: "FCPS_Minimax" seed: 2 - # - values: ["--method", "FCPS_MinEnergy", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_2", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_4", "--seed", 2] - # - values: ["--method", "FCPS_HDBSCAN_8", "--seed", 2] - # - values: ["--method", "FCPS_Diana", "--seed", 2] - # - values: ["--method", "FCPS_Fanny", "--seed", 2] - # - values: ["--method", "FCPS_Hardcl", "--seed", 2] - # - values: ["--method", "FCPS_Softcl", "--seed", 2] - # - values: ["--method", "FCPS_Clara", "--seed", 2] - # - values: ["--method", "FCPS_PAM", "--seed", 2] + # - "FCPS_AdaptiveDensityPeak" # not in Conda + # - "FCPS_MinEnergy", + # - "FCPS_HDBSCAN_2", + # - "FCPS_HDBSCAN_4", + # - "FCPS_HDBSCAN_8", + # - "FCPS_Diana", + # - "FCPS_Fanny", + # - "FCPS_Hardcl", + # - "FCPS_Softcl", + # - "FCPS_Clara", + # - "FCPS_PAM" inputs: - data.matrix - data.true_labels @@ -197,18 +234,18 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_metrics - commit: 9132d45 + commit: c4eda85 parameters: - metric: ["normalized_clustering_accuracy", "adjusted_fm_score"] - # - values: ["--metric", "adjusted_mi_score"] - # - values: ["--metric", "adjusted_rand_score"] - # - values: ["--metric", "fm_score"] - # - values: ["--metric", "mi_score"] - # - values: ["--metric", "normalized_clustering_accuracy"] - # - values: ["--metric", "normalized_mi_score"] - # - values: ["--metric", "normalized_pivoted_accuracy"] - # - values: ["--metric", "pair_sets_index"] - # - values: ["--metric", "rand_score"] + # - "adjusted_mi_score" + # - "adjusted_rand_score" + # - "fm_score" + # - "mi_score" + # - "normalized_clustering_accuracy" + # - "normalized_mi_score" + # - "normalized_pivoted_accuracy" + # - "pair_sets_index" + # - "rand_score" inputs: - clustering.predicted_ks_range - data.true_labels diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index 401cd32..990f306 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -40,7 +40,7 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_data - commit: 8fd8744 + commit: fc67ebd parameters: # comments depict the possible cardinalities and the number of curated labelsets - dataset_generator: "fcps" dataset_name: ["atom", "chainlink"] # 2 1 @@ -180,79 +180,59 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_fastcluster - # url: /home/imallona/src/clustbench_fastcluster/ - commit: "45e43d3" + commit: e644ce5 parameters: - linkage: "complete" - - # - linkage: "ward" - # - linkage: "average" - # - linkage: "weighted" - # - linkage: "median" - # - linkage: "centroid" + #- linkage: ["ward", "average", "weighted", "median", "centroid"] - id: sklearn name: "sklearn" software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_sklearn - #url: /home/imallona/src/clustbench_sklearn - commit: 5877378 + commit: dcf35e1 parameters: - method: "birch" - # - method: "kmeans" - # - method: "spectral" ## too slow - # - method: "gm" + # ["kmeans, "gm"] + # ["spectral"] ## too slow - id: agglomerative name: "agglomerative" software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_agglomerative - commit: 5454368 + commit: 9d086a9 parameters: - linkage: "average" - # - linkage: "complete" - # - linkage: "ward" + # ["complete", "ward"] - id: genieclust name: "genieclust" software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_genieclust - commit: 6090043 + commit: 7d9e799 parameters: - method: "genie" gini_threshold: 0.5 - # - method: "gic" - # - method: "ica" + # method: ["gic", "ica"] - id: fcps name: "fcps" software_environment: "fcps" repository: url: https://github.com/imallona/clustbench_fcps - commit: fc37faa + commit: e780fed parameters: - # - method: "FCPS_AdaptiveDensityPeak" # not in Conda - method: "FCPS_Minimax" seed: 2 - # - method: "FCPS_MinEnergy" - # seed: 2 - # - method: "FCPS_HDBSCAN_2" - # seed: 2 - # - method: "FCPS_HDBSCAN_4" - # seed: 2 - # - method: "FCPS_HDBSCAN_8" - # seed: 2 - # - method: "FCPS_Diana" - # seed: 2 - # - method: "FCPS_Fanny" - # seed: 2 - # - method: "FCPS_Hardcl" - # seed: 2 - # - method: "FCPS_Softcl" - # seed: 2 - # - method: "FCPS_Clara" - # seed: 2 - # - method: "FCPS_PAM" - # seed: 2 + # - "FCPS_AdaptiveDensityPeak" # not in Conda + # - "FCPS_MinEnergy", + # - "FCPS_HDBSCAN_2", + # - "FCPS_HDBSCAN_4", + # - "FCPS_HDBSCAN_8", + # - "FCPS_Diana", + # - "FCPS_Fanny", + # - "FCPS_Hardcl", + # - "FCPS_Softcl", + # - "FCPS_Clara", + # - "FCPS_PAM" inputs: - data.matrix - data.true_labels @@ -267,18 +247,18 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_metrics - commit: 9132d45 + commit: c4eda85 parameters: - metric: ["normalized_clustering_accuracy", "adjusted_fm_score"] - # - metric: "adjusted_mi_score" - # - metric: "adjusted_rand_score" - # - metric: "fm_score" - # - metric: "mi_score" - # - metric: "normalized_clustering_accuracy" - # - metric: "normalized_mi_score" - # - metric: "normalized_pivoted_accuracy" - # - metric: "pair_sets_index" - # - metric: "rand_score" + # - "adjusted_mi_score" + # - "adjusted_rand_score" + # - "fm_score" + # - "mi_score" + # - "normalized_clustering_accuracy" + # - "normalized_mi_score" + # - "normalized_pivoted_accuracy" + # - "pair_sets_index" + # - "rand_score" inputs: - clustering.predicted_ks_range - data.true_labels diff --git a/Clustering_oras.yml b/Clustering_oras.yml index 8925bee..99eeaa5 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -40,10 +40,10 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_data - commit: 8fd8744 + commit: fc67ebd parameters: # comments depict the possible cardinalities and the number of curated labelsets - dataset_generator: "fcps" - dataset_name: ["atom"] # 2 1 + dataset_name: ["atom", "chainlink"] # 2 1 # - dataset_generator: "fcps" # dataset_name: "engytime" # 2 2 @@ -180,28 +180,59 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_fastcluster - # url: /home/imallona/src/clustbench_fastcluster/ - commit: "45e43d3" + commit: e644ce5 parameters: - linkage: "complete" - - # - linkage: "ward" - # - linkage: "average" - # - linkage: "weighted" - # - linkage: "median" - # - linkage: "centroid" + #- linkage: ["ward", "average", "weighted", "median", "centroid"] - id: sklearn name: "sklearn" software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_sklearn - #url: /home/imallona/src/clustbench_sklearn - commit: 5877378 + commit: dcf35e1 parameters: - method: "birch" - # - method: "kmeans" - # - method: "spectral" ## too slow - # - method: "gm" + # ["kmeans, "gm"] + # ["spectral"] ## too slow + - id: agglomerative + name: "agglomerative" + software_environment: "clustbench" + repository: + url: https://github.com/imallona/clustbench_agglomerative + commit: 9d086a9 + parameters: + - linkage: "average" + # ["complete", "ward"] + - id: genieclust + name: "genieclust" + software_environment: "clustbench" + repository: + url: https://github.com/imallona/clustbench_genieclust + commit: 7d9e799 + parameters: + - method: "genie" + # method: ["gic", "ica"] + gini_threshold: 0.5 + - id: fcps + name: "fcps" + software_environment: "fcps" + repository: + url: https://github.com/imallona/clustbench_fcps + commit: e780fed + parameters: + - method: "FCPS_Minimax" + seed: 2 + # - "FCPS_AdaptiveDensityPeak" # not in Conda + # - "FCPS_MinEnergy", + # - "FCPS_HDBSCAN_2", + # - "FCPS_HDBSCAN_4", + # - "FCPS_HDBSCAN_8", + # - "FCPS_Diana", + # - "FCPS_Fanny", + # - "FCPS_Hardcl", + # - "FCPS_Softcl", + # - "FCPS_Clara", + # - "FCPS_PAM" inputs: - data.matrix - data.true_labels @@ -216,18 +247,18 @@ stages: software_environment: "clustbench" repository: url: https://github.com/imallona/clustbench_metrics - commit: 9132d45 + commit: c4eda85 parameters: - - metric: ["normalized_clustering_accuracy"] - # - metric: "adjusted_mi_score" - # - metric: "adjusted_rand_score" - # - metric: "fm_score" - # - metric: "mi_score" - # - metric: "normalized_clustering_accuracy" - # - metric: "normalized_mi_score" - # - metric: "normalized_pivoted_accuracy" - # - metric: "pair_sets_index" - # - metric: "rand_score" + - metric: ["normalized_clustering_accuracy", "adjusted_fm_score"] + # - "adjusted_mi_score" + # - "adjusted_rand_score" + # - "fm_score" + # - "mi_score" + # - "normalized_clustering_accuracy" + # - "normalized_mi_score" + # - "normalized_pivoted_accuracy" + # - "pair_sets_index" + # - "rand_score" inputs: - clustering.predicted_ks_range - data.true_labels From 437714b30494f1e5193505ba4f83594b4ecc8730 Mon Sep 17 00:00:00 2001 From: Daniel Incicau Date: Tue, 20 Jan 2026 14:26:19 +0100 Subject: [PATCH 6/9] Remove {input} prefix from metric collector --- Clustering_conda.yml | 2 +- Clustering_envmodules.yml | 2 +- Clustering_oras.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index dd8755f..c8b1b12 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: "{input}/{name}/plotting_report.html" + path: "{name}/plotting_report.html" stages: ## clustbench data ########################################################## diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index 990f306..fec8ff7 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: "{input}/{name}/plotting_report.html" + path: "{name}/plotting_report.html" stages: ## clustbench data ########################################################## diff --git a/Clustering_oras.yml b/Clustering_oras.yml index 99eeaa5..bbb2b6c 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -29,7 +29,7 @@ metric_collectors: - metrics.scores outputs: - id: plotting.html - path: "{input}/{name}/plotting_report.html" + path: "{name}/plotting_report.html" stages: ## clustbench data ########################################################## From da79a57ab1d9afd01d039bcd8bcdc66b2f736574 Mon Sep 17 00:00:00 2001 From: Mark Robinson Date: Tue, 20 Jan 2026 18:44:38 +0100 Subject: [PATCH 7/9] update commit hash to newer report repo --- Clustering_conda.yml | 2 +- Clustering_envmodules.yml | 2 +- Clustering_oras.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index c8b1b12..1492a3c 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: "fcps" repository: url: https://github.com/imallona/clustering_report - commit: bbb9d56 + commit: 381d20e inputs: - metrics.scores outputs: diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index fec8ff7..6fb0f79 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: "fcps" repository: url: https://github.com/imallona/clustering_report - commit: bbb9d56 + commit: 381d20e inputs: - metrics.scores outputs: diff --git a/Clustering_oras.yml b/Clustering_oras.yml index bbb2b6c..b131823 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: fcps repository: url: https://github.com/imallona/clustering_report - commit: bbb9d56 + commit: 381d20e inputs: - metrics.scores outputs: From 4cd9376dcd1d0565a67d6b292be11a2d5d44310c Mon Sep 17 00:00:00 2001 From: Mark Robinson Date: Tue, 20 Jan 2026 18:50:02 +0100 Subject: [PATCH 8/9] change it to branch name --- Clustering_conda.yml | 2 +- Clustering_envmodules.yml | 2 +- Clustering_oras.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index 1492a3c..f0ece39 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: "fcps" repository: url: https://github.com/imallona/clustering_report - commit: 381d20e + commit: 040 inputs: - metrics.scores outputs: diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index 6fb0f79..47d35b3 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: "fcps" repository: url: https://github.com/imallona/clustering_report - commit: 381d20e + commit: 040 inputs: - metrics.scores outputs: diff --git a/Clustering_oras.yml b/Clustering_oras.yml index b131823..606f3e0 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: fcps repository: url: https://github.com/imallona/clustering_report - commit: 381d20e + commit: 040 inputs: - metrics.scores outputs: From 269799555a339120bd2b6ced8958538007d94cd7 Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 27 Jan 2026 13:53:53 +0100 Subject: [PATCH 9/9] should be a string otherwise, this is interpreted as a number. 040 is 32 in octal, which is what git was trying to clone. --- Clustering_conda.yml | 2 +- Clustering_envmodules.yml | 2 +- Clustering_oras.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Clustering_conda.yml b/Clustering_conda.yml index f0ece39..bc56d6b 100644 --- a/Clustering_conda.yml +++ b/Clustering_conda.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: "fcps" repository: url: https://github.com/imallona/clustering_report - commit: 040 + commit: "040" inputs: - metrics.scores outputs: diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml index 47d35b3..4382ad7 100644 --- a/Clustering_envmodules.yml +++ b/Clustering_envmodules.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: "fcps" repository: url: https://github.com/imallona/clustering_report - commit: 040 + commit: "040" inputs: - metrics.scores outputs: diff --git a/Clustering_oras.yml b/Clustering_oras.yml index 606f3e0..3005070 100644 --- a/Clustering_oras.yml +++ b/Clustering_oras.yml @@ -24,7 +24,7 @@ metric_collectors: software_environment: fcps repository: url: https://github.com/imallona/clustering_report - commit: 040 + commit: "040" inputs: - metrics.scores outputs: