EBIvariation · apriltuesday · Nov 13, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/bin/upload_to_gcloud.py b/bin/upload_to_gcloud.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+from datetime import datetime
+
+from google.cloud import storage
+
+
+def upload_evidence_to_gcloud_bucket(source_file_name, destination_folder):
+    """Uploads source_file_name to destination_folder in the Google Cloud Storage bucket.
+    File will be renamed to cttv012-[yyyy]-[mm]-[dd].json.gz"""
+    if 'OT_BUCKET_NAME' not in os.environ or 'OT_CREDS_FILE' not in os.environ:
+        print('Environment variables OT_BUCKET_NAME and OT_CREDS_FILE must be set')
+        return
+    bucket_name = os.environ['OT_BUCKET_NAME']
+    creds_json_file = os.environ['OT_CREDS_FILE']
+
+    date_string = datetime.today().strftime('%Y-%m-%d')
+    destination_blob_name = f'{destination_folder}/cttv012-{date_string}.json.gz'
+
+    storage_client = storage.Client.from_service_account_json(creds_json_file)
+    bucket = storage_client.bucket(bucket_name)
+    blob = bucket.blob(destination_blob_name)
+
+    blob.upload_from_filename(source_file_name)
+    print(f'File {source_file_name} uploaded to gs://{bucket_name}/{destination_blob_name}.')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Upload file to Open Targets Google Cloud Storage')
+    parser.add_argument('--input-file', required=True, help='File to upload')
+    parser.add_argument('--destination-folder', required=True, help='Destination folder within bucket')
+    args = parser.parse_args()
+    upload_evidence_to_gcloud_bucket(args.input_file, args.destination_folder)
diff --git a/docs/open-targets/environment.md b/docs/open-targets/environment.md
@@ -1,10 +1,11 @@
 # Setting up the common environment
 
-1. Log in to the LSF cluster (currently `codon`), where all data processing must take place.
+1. Log in to the Codon cluster, where all data processing must take place.
 1. Using a `become` command, switch to a common EVA production user instead of your personal account.
-1. Adjust and execute the commands below to set up the environment. Notes:
-    - The first five variables are installation-specific and are blanked in this repository. You can get the values for the EVA installation from the [private repository](https://github.com/EBIvariation/configuration/blob/master/open-targets-configuration.md).
-    - These instructions use the Gitlab deployment of the pipeline onto the cluster. To use another installation just modify `CODE_ROOT` and `PYTHON_BIN` accordingly.
+1. Adjust and execute the commands below to set up the environment.
+
+Note several variables are installation-specific and are blanked in this repository. EVA users can refer to the [private repository](https://github.com/EBIvariation/configuration/blob/master/open-targets/set-up-clinvar.sh) for values
+(or `source` the file directly in the cluster).
 
 ```bash
 # This variable should point to the directory where the clone of this repository is located on the cluster
@@ -28,4 +29,8 @@ export PYTHONPATH=${PYTHON_INSTALL_PATH}
 
 # Location of Python executable, pointing to the virtualenv
 export PYTHON_BIN=${CODE_ROOT}/env/bin/python
+
+# Required for Google Cloud upload
+export OT_BUCKET_NAME=
+export OT_CREDS_FILE=
 ```
diff --git a/docs/open-targets/generate-evidence-strings.md b/docs/open-targets/generate-evidence-strings.md
@@ -59,14 +59,19 @@ After the evidence strings have been generated, summary metrics need to be updat
 There are also a few version numbers to record. For EFO version, compare the release date [here](https://github.com/EBISPOT/efo/releases) with manual curation date. For Ensembl version, do the same with the release date [here](https://www.ensembl.org/index.html) and the evidence string generation date.
 
 ### Submit evidence strings
-The evidence string file (`evidence_strings.json`) must be uploaded to the [Open Targets Google Cloud Storage](https://console.cloud.google.com/storage/browser/otar012-eva/) and be named in the format `cttv012-[yyyy]-[mm]-[dd].json.gz` (e.g. `cttv012-2020-10-21.json.gz`).
+The evidence string file (`evidence_strings.json`) must be compressed and uploaded to the [Open Targets Google Cloud Storage](https://console.cloud.google.com/storage/browser/otar012-eva/).
+To do this, run the following:
+```shell
+gzip evidence_strings/evidence_strings.json
+${CODE_ROOT}/bin/upload_to_gcloud.py --input-file evidence_strings/evidence_strings.json.gz --destination-folder disease-target-evidence
+```
 
 Once the upload is complete, send an email to Open Targets (data [at] opentargets.org) containing the following information from the [metrics spreadsheet](https://docs.google.com/spreadsheets/d/1g_4tHNWP4VIikH7Jb0ui5aNr0PiFgvscZYOe69g191k/):
 * The number of submitted evidence strings
 * The ClinVar release date
 * The Ensembl release
 * The EFO version used for mapping
-* The `eva-opentargets` pipeline version
+* The `CMAT` pipeline version
 * The Open Targets JSON schema version
 
 ### Submit feedback to ZOOMA
@@ -76,7 +81,7 @@ The idea with [ZOOMA](http://www.ebi.ac.uk/spot/zooma/) is that we not only use
 
 The files are uploaded to the FTP, where ZOOMA will pick it up. At this stage, you only need to upload the **clinvar_xrefs** dataset (the *eva_clinvar* dataset is updated in the process of the manual curation).
 
-To make changes to the FTP, you will need to log in to the cluster using your **personal account** and then you will need to have set up the common environment like in step 1.
+To make changes to the FTP, you will need to log in to a datamover node.
 ```bash
 # Create the folder, copy the file to FTP, and update the “latest” folder
 FTP_PATH=${FTP_PATH_BASE}/`date +%Y/%m/%d`

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
 coverage==6.5.0
 coveralls==3.3.1
+google-cloud-storage==3.5.0
 jsonschema==4.23.0
 numpy==1.26.0
 pandas==2.2.3

diff --git a/setup.py b/setup.py
@@ -43,5 +43,6 @@ def get_requires():
       tests_require=get_requires(),
       setup_requires=get_requires(),
       test_suite='tests',
-      classifiers=classifiers
+      classifiers=classifiers,
+      scripts=[os.path.join(os.path.dirname(__file__), 'bin', 'upload_to_gcloud.py')]
       )
diff --git a/tests/trait_mapping/test_main.py b/tests/trait_mapping/test_main.py
@@ -97,17 +97,17 @@ def test_ols_exact_match(self):
     def test_zooma_high_confidence(self):
         # Finds nothing exact via OLS, so goes through Zooma as well and finds a high-confidence result
         trait = Trait('11p partial monosomy syndrome', None, None)
-        processed_trai = self.run_process_trait(trait)
-        assert len(processed_trai.ols_result_list) == 6
-        assert len(processed_trai.zooma_result_list) == 1
-        assert processed_trai.is_finished
+        processed_trait = self.run_process_trait(trait)
+        assert len(processed_trait.ols_result_list) == 6
+        assert len(processed_trait.zooma_result_list) == 2
+        assert processed_trait.is_finished
 
     def test_not_finished(self):
         # No sufficiently good mappings in OLS or Zooma
         trait = Trait('aicardi-goutieres syndrome 99', None, None)
         processed_trait = self.run_process_trait(trait)
         assert len(processed_trait.ols_result_list) == 0
-        assert len(processed_trait.zooma_result_list) == 15
+        assert len(processed_trait.zooma_result_list) == 19
         assert not processed_trait.is_finished
 
     def test_ols_exact_ascii_match(self):