conservationtechlab · Sean1572 · Jan 13, 2026 · Jan 17, 2026
diff --git a/.gitignore b/.gitignore
@@ -237,4 +237,4 @@ demos/
 # Block predictions
 predictions/*
 *.pkl
-*.arrow
+*.arrow
diff --git a/data_exporters/label_studio_exporter/README.md b/data_exporters/label_studio_exporter/README.md
@@ -0,0 +1,24 @@
+# Labeling Audio Data in Label Studio
+
+This pipeline intends to take audiodatasets from the inferance or the data_extractors of `whoot_model_training` and format projects in label studio to easily verify annotations. The following readme outlines best practices for creating and intergrating with Label Studio
+
+
+## Creating new project
+
+1) Create a .env file with the following properties in the same folder that this readme is in
+
+```
+# Define the URL where Label Studio is accessible
+LABEL_STUDIO_URL = 'HOSTNAME OF LABEL STUDIO INSTANCE'
+# API key is available at the Account & Settings page in Label Studio UI
+LABEL_STUDIO_API_KEY = 'INSERT YOUR API KEY'
+LABEL_STUDIO_PROJECT_ID = 'PROJECT ID FROM URL OF PROJECT'
+```
+
+2) Create a new project in that label studio instance and uploaded the needed data to it
+
+NOTE: Save the project_id from the URL of the project
+
+If keeping the data local on the instance, try to keep the file structure the same as is the audio file from your ML machine. For example, if some dataset is located at `mnt/datasets/audio_dataset_cool/AB/1/audio.wav` then you may want to make the path on label studio something like `label_studio_path/audio_dataset_cool/AB/1/audio.wav` for the easiest intergrations. Otherwise some minor file changes will be needed.
+
+3) Run the script to apply annotations, see demo.py in this folder
diff --git a/data_exporters/label_studio_exporter/demo.py b/data_exporters/label_studio_exporter/demo.py
@@ -0,0 +1,56 @@
+"""Demo for using Label Studio Exporter with a sample dataset."""
+
+import os
+import random
+from dotenv import load_dotenv
+from label_studio import LabelStudioSetup
+import datasets
+
+if __name__ == "__main__":
+
+    load_dotenv()
+
+    # SELECT A PROJECT FROM LABEL STUDIO
+    # FIND ID IN URL OF PROJECT
+    PROJECT_ID = int(os.getenv("LABEL_STUDIO_PROJECT_ID"))
+    ls_setup = LabelStudioSetup(
+        current_project=PROJECT_ID
+    )
+    # ADD DEFAULT TEMPLATE TO LABEL STUDIO
+    ls_setup.apply_custom_template("template.xml")
+
+    # HOW TO GET AUDIO FILES TO REVIEW
+    # Note this is not a perfect process as
+    # diffrences between label studio and your dataset may exist
+    ls_setup.get_files(ls_file_parent='data/local-files/?d=data1/')
+
+    # Make sure your file names align to label studio files
+
+    # ===============================================================
+    # below is a fake dataset creation for demo purposes only
+    # In practice you would load your dataset from the saves in
+    # whoot_model_training
+    class_list = ['cluck', 'coocoo',
+                  'twitter', 'alarm', 'chick begging', 'no_buow']
+
+    ds = datasets.Dataset.from_dict({
+        "audio": ls_setup.get_files(
+            ls_file_parent='data/local-files/?d=data1/')["files"],
+        "labels": random.choices(
+            class_list, k=len(
+                ls_setup.get_files(
+                    ls_file_parent='data/local-files/?d=data1/')["files"]
+            )
+        )
+    })
+
+    ds = ds.cast_column(
+        "audio", datasets.Audio(sampling_rate=16000, decode=False))
+    # ===============================================================
+
+    # UPLOAD DATASET TO LABEL STUDIO
+    ls_setup.update_tasks_in_ls(
+        ds,
+        ls_file_parent='data/local-files/?d=data1/',
+        is_model_prediction=True
+    )
-Original file line number
+Diff line change
@@ Expand Up / @@ -237,4 +237,4 @@ demos/ @@
     # Block predictions
     predictions/*
     *.pkl
-    *.arrow
+    *.arrow