Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions bin/run-response.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash

ROOT=`git rev-parse --show-toplevel`

export PYTHONPATH=$ROOT

_repetition=5
_default_model=gpt-4o-mini

while getopts 'n:p:d:g:m:e:ch' option; do
case $option in
n) _repetition=$OPTARG ;;
p) _prompts=$OPTARG ;;
d) _documents=$OPTARG ;;
g) _gt=$OPTARG ;;
m) _models=( ${_models[@]} --model $OPTARG ) ;;
e) _extra=( ${_extra[@]} --extra-info $OPTARG ) ;;
h)
cat <<EOF
Usage: $0
-n Number of times to repeat each judgement (default $_repetition)
-p Directory containing system and user prompts. The value provided
is expected to contain "system" and "user" subdirectories
-d Directory containing documents for the OpenAI vector store
-g Directory containing reference responses. If this option is
provided only user prompts that have a corresponding
ground truth answer will be run
-m OpenAI model. Specify multiple times to test multiple models
EOF
exit 0
;;
*)
echo -e Unrecognized option \"$option\"
exit 1
;;
esac
done

if [ ${#_models[@]} -eq 0 ]; then
_models=( --model $_default_model )
fi

python $ROOT/src/prompt/build.py ${_extra[@]} \
--user-prompts $_prompts/user \
--system-prompts $_prompts/system \
--documents $_documents \
--repetition $_repetition | \
if [ $_gt ]; then
python $ROOT/src/prompt/cull.py --ground-truth $_gt
else
cat
fi | \
python $ROOT/src/prompt/response.py ${_models[@]} \
--document-root $_documents \
--prompt-root $_prompts



48 changes: 48 additions & 0 deletions src/analysis/response-scores/latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pandas as pd
import matplotlib.pyplot as plt
from argparse import ArgumentParser
from pathlib import Path

def load_and_flatten_jsonl(file_path: Path) -> pd.DataFrame:
df = pd.read_json(file_path, lines=True)
# Flatten the nested response field
response_data = pd.json_normalize(df['response'].apply(lambda r: r[0] if isinstance(r, list) and r else {}))
df_flat = pd.concat([df.drop(columns='response'), response_data], axis=1)
return df_flat

def compute_latency_stats(df: pd.DataFrame) -> pd.DataFrame:
latencies = df['latency']
stats = {
'Total Responses': len(latencies),
'Total Time (minutes)': latencies.sum() / 60,
'Min Latency (seconds)': latencies.min(),
'Max Latency (seconds)': latencies.max(),
'Average Latency (seconds)': latencies.mean(),
'Median Latency (seconds)': latencies.median()
}
return pd.DataFrame(list(stats.items()), columns=['Metric', 'Value'])

def plot_latency_stats(stats_df: pd.DataFrame, output_path: Path):
plt.figure(figsize=(10, 6))
bars = plt.barh(stats_df['Metric'], stats_df['Value'], color='skyblue')
plt.xlabel('Value')
plt.title('Latency Summary')
plt.grid(True, linestyle='dotted', axis='x', alpha=0.5)

for bar in bars:
width = bar.get_width()
plt.text(width, bar.get_y() + bar.get_height() / 2, f'{width:.2f}', va='center')

plt.tight_layout()
plt.savefig(output_path)
print(f"Saved plot to {output_path}")

if __name__ == '__main__':
parser = ArgumentParser(description="Plot latency summary from a JSONL file.")
parser.add_argument('--input', type=Path, required=True, help="Path to the .jsonl file")
parser.add_argument('--output', type=Path, default=Path("latency_summary.png"), help="Path to save the output plot")
args = parser.parse_args()

df = load_and_flatten_jsonl(args.input)
stats_df = compute_latency_stats(df)
plot_latency_stats(stats_df, args.output)
Loading