Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/.env.ci
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Your Credentials:
NILLION_ORG_DID="Add your ORG DID here"
NILLION_ORG_SECRET_KEY="Add you SECRET_KEY here"

# Update this with your own schema ID after creating a new schema
SCHEMA_ID=
CLUSTERS_SCHEMA_ID=
QUERY_ID=

# Cluster Config:
# nildb-node 1:
URL1='https://nildb-nx8v.nillion.network'
DID1='did:nil:testnet:nillion1qfrl8nje3nvwh6cryj63mz2y6gsdptvn07nx8v'

# nildb-node 2:
URL2='https://nildb-p3mx.nillion.network'
DID2='did:nil:testnet:nillion1uak7fgsp69kzfhdd6lfqv69fnzh3lprg2mp3mx'

# nildb-node 3:
URL3='https://nildb-rugk.nillion.network'
DID3='did:nil:testnet:nillion1kfremrp2mryxrynx66etjl8s7wazxc3rssrugk'
35 changes: 34 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ on:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:

jobs:
test:
permissions:
contents: write
runs-on: ubuntu-latest
strategy:
matrix:
Expand Down Expand Up @@ -35,7 +38,7 @@ jobs:
run: |
uv pip install -e ".[dev]"
uv pip install isort pylint

- name: Check code formatting with isort
run: |
uv run isort --check-only .
Expand All @@ -47,3 +50,33 @@ jobs:
- name: Run tests
run: |
uv run -m unittest test.rag

- name: Set up environment
run: |
cp .github/workflows/.env.ci .env
# Replace Nillion credentials in .env
# Note: The .env.ci file contains a dataset of 1000 paragraphs for benchmarking.
# This benchmark specifically tests the non-clustered case of RAG execution
sed -i 's/NILLION_ORG_DID=.*/NILLION_ORG_DID=${{ secrets.NILLION_ORG_DID }}/' .env
sed -i 's/NILLION_ORG_SECRET_KEY=.*/NILLION_ORG_SECRET_KEY=${{ secrets.NILLION_ORG_SECRET_KEY }}/' .env
sed -i 's/SCHEMA_ID=.*/SCHEMA_ID=${{ secrets.SCHEMA_ID }}/' .env
sed -i 's/QUERY_ID=.*/QUERY_ID=${{ secrets.QUERY_ID }}/' .env

- name: Run benchmark
run: |
uv run pytest benchmarks/test_rag.py --benchmark-json output.json

- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
with:
name: Python Benchmark with pytest-benchmark
tool: 'pytest'
output-file-path: output.json
github-token: ${{ secrets.BENCHMARK_ACTION_BOT_TOKEN }} # Note: This token has a limited lifetime (30 days) and needs to be renewed periodically.
auto-push: true
# Show alert with commit comment on detecting possible performance regression
alert-threshold: '135%'
comment-on-alert: true
fail-on-alert: true
gh-pages-branch: gh-pages
benchmark-data-dir-path: .
76 changes: 76 additions & 0 deletions benchmarks/test_rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
Benchmarks for RAG performance using pytest-benchmark.

This script:
1. Sets up a RAG instance with nilDB configuration
2. Performs a warm-up phase to ensure stable measurements
3. Runs benchmark tests using pytest-benchmark's pedantic mode
4. Measures execution time and performance metrics for RAG operations.
"""

import asyncio
import os

import pytest
from dotenv import load_dotenv

from nilrag.nildb.org_config import ORG_CONFIG
from nilrag.rag_vault import RAGVault


def test_rag_pedantic(benchmark):
"""
Benchmark test for RAG performance using pytest-benchmark's pedantic mode.

This test:
1. Initializes the RAG system with nilDB configuration
2. Sets up test parameters (prompt, number of chunks, clusters)
3. Performs a warm-up phase to stabilize measurements
4. Runs the benchmark with multiple iterations and rounds
5. Verifies the result is a list

Args:
benchmark: pytest-benchmark fixture for performance testing
"""
load_dotenv(override=True)

schema_id = os.getenv("SCHEMA_ID")
clusters_schema_id = os.getenv("CLUSTERS_SCHEMA_ID")
subtract_query_id = os.getenv("QUERY_ID")

# Setup RAG instance
rag = asyncio.run(RAGVault.create(
ORG_CONFIG["nodes"],
ORG_CONFIG["org_credentials"],
schema_id=schema_id,
clusters_schema_id=clusters_schema_id,
subtract_query_id=subtract_query_id,
))

prompt = "Who is Michelle Ross?"
num_chunks = 2
num_clusters = 1

def sync_runner():
"""
Synchronous wrapper for the async RAG execution.

This function:
1. Wraps the async top_num_chunks_execute in a synchronous context
2. Executes the RAG query with the configured parameters
3. Returns the retrieved chunks

Returns:
list: Retrieved chunks from the RAG system
"""
return asyncio.run(rag.top_num_chunks_execute(
prompt, num_chunks, False, num_clusters
))

# Warm up
for _ in range(10):
sync_runner()
#Actual benchmark
result = benchmark.pedantic(sync_runner, iterations=10, rounds=5)

assert isinstance(result, list)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ dev = [
"black>=24.10.0",
"isort>=5.13.2",
"pylint>=3.3.3",
"pytest>=8.3.5",
"pytest-benchmark>=5.1.0",
]

[tool.setuptools.packages.find]
Expand Down
Loading