NillionNetwork · jfdreis · May 30, 2025 · May 13, 2025
diff --git a/.github/workflows/.env.ci b/.github/workflows/.env.ci
@@ -0,0 +1,21 @@
+# Your Credentials:
+NILLION_ORG_DID="Add your ORG DID here"
+NILLION_ORG_SECRET_KEY="Add you SECRET_KEY here"
+
+# Update this with your own schema ID after creating a new schema
+SCHEMA_ID=
+CLUSTERS_SCHEMA_ID=
+QUERY_ID=
+
+# Cluster Config:
+# nildb-node 1:
+URL1='https://nildb-nx8v.nillion.network'
+DID1='did:nil:testnet:nillion1qfrl8nje3nvwh6cryj63mz2y6gsdptvn07nx8v'
+
+# nildb-node 2:
+URL2='https://nildb-p3mx.nillion.network'
+DID2='did:nil:testnet:nillion1uak7fgsp69kzfhdd6lfqv69fnzh3lprg2mp3mx'
+
+# nildb-node 3:
+URL3='https://nildb-rugk.nillion.network'
+DID3='did:nil:testnet:nillion1kfremrp2mryxrynx66etjl8s7wazxc3rssrugk'
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -5,9 +5,12 @@ on:
     branches: [ main ]
   pull_request:
     branches: [ main ]
+  workflow_dispatch:
 
 jobs:
   test:
+    permissions:
+      contents: write 
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -35,7 +38,7 @@ jobs:
       run: |
         uv pip install -e ".[dev]"
         uv pip install isort pylint
-
+    
     - name: Check code formatting with isort
       run: |
         uv run isort --check-only .
@@ -47,3 +50,33 @@ jobs:
     - name: Run tests
       run: |
         uv run -m unittest test.rag
+
+    - name: Set up environment
+      run: |
+        cp .github/workflows/.env.ci .env
+        # Replace Nillion credentials in .env
+        # Note: The .env.ci file contains a dataset of 1000 paragraphs for benchmarking.
+        # This benchmark specifically tests the non-clustered case of RAG execution
+        sed -i 's/NILLION_ORG_DID=.*/NILLION_ORG_DID=${{ secrets.NILLION_ORG_DID }}/' .env
+        sed -i 's/NILLION_ORG_SECRET_KEY=.*/NILLION_ORG_SECRET_KEY=${{ secrets.NILLION_ORG_SECRET_KEY }}/' .env
+        sed -i 's/SCHEMA_ID=.*/SCHEMA_ID=${{ secrets.SCHEMA_ID }}/' .env
+        sed -i 's/QUERY_ID=.*/QUERY_ID=${{ secrets.QUERY_ID }}/' .env
+
+    - name: Run benchmark
+      run: |
+        uv run pytest benchmarks/test_rag.py --benchmark-json output.json
+
+    - name: Store benchmark result
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        name: Python Benchmark with pytest-benchmark
+        tool: 'pytest'
+        output-file-path: output.json
+        github-token: ${{ secrets.BENCHMARK_ACTION_BOT_TOKEN }} # Note: This token has a limited lifetime (30 days) and needs to be renewed periodically.
+        auto-push: true
+        # Show alert with commit comment on detecting possible performance regression
+        alert-threshold: '135%'
+        comment-on-alert: true
+        fail-on-alert: true
+        gh-pages-branch: gh-pages
+        benchmark-data-dir-path: .
diff --git a/benchmarks/test_rag.py b/benchmarks/test_rag.py
@@ -0,0 +1,76 @@
+"""
+Benchmarks for RAG performance using pytest-benchmark.
+
+This script:
+1. Sets up a RAG instance with nilDB configuration
+2. Performs a warm-up phase to ensure stable measurements
+3. Runs benchmark tests using pytest-benchmark's pedantic mode
+4. Measures execution time and performance metrics for RAG operations.
+"""
+
+import asyncio
+import os
+
+import pytest
+from dotenv import load_dotenv
+
+from nilrag.nildb.org_config import ORG_CONFIG
+from nilrag.rag_vault import RAGVault
+
+
+def test_rag_pedantic(benchmark):
+    """
+    Benchmark test for RAG performance using pytest-benchmark's pedantic mode.
+
+    This test:
+    1. Initializes the RAG system with nilDB configuration
+    2. Sets up test parameters (prompt, number of chunks, clusters)
+    3. Performs a warm-up phase to stabilize measurements
+    4. Runs the benchmark with multiple iterations and rounds
+    5. Verifies the result is a list
+
+    Args:
+        benchmark: pytest-benchmark fixture for performance testing
+    """
+    load_dotenv(override=True)
+
+    schema_id = os.getenv("SCHEMA_ID")
+    clusters_schema_id = os.getenv("CLUSTERS_SCHEMA_ID")
+    subtract_query_id = os.getenv("QUERY_ID")
+
+    # Setup RAG instance
+    rag = asyncio.run(RAGVault.create(
+        ORG_CONFIG["nodes"],
+        ORG_CONFIG["org_credentials"],
+        schema_id=schema_id,
+        clusters_schema_id=clusters_schema_id,
+        subtract_query_id=subtract_query_id,
+    ))
+
+    prompt = "Who is Michelle Ross?"
+    num_chunks = 2
+    num_clusters = 1
+
+    def sync_runner():
+        """
+        Synchronous wrapper for the async RAG execution.
+
+        This function:
+        1. Wraps the async top_num_chunks_execute in a synchronous context
+        2. Executes the RAG query with the configured parameters
+        3. Returns the retrieved chunks
+
+        Returns:
+            list: Retrieved chunks from the RAG system
+        """
+        return asyncio.run(rag.top_num_chunks_execute(
+            prompt, num_chunks, False, num_clusters
+        ))
+
+    # Warm up
+    for _ in range(10):
+        sync_runner()
+    #Actual benchmark
+    result = benchmark.pedantic(sync_runner, iterations=10, rounds=5)
+
+    assert isinstance(result, list)
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,8 @@ dev = [
     "black>=24.10.0",
     "isort>=5.13.2",
     "pylint>=3.3.3",
+    "pytest>=8.3.5",
+    "pytest-benchmark>=5.1.0",
 ]
 
 [tool.setuptools.packages.find]