togethercomputer · VProv · Nov 17, 2025 · Dec 19, 2025 · Dec 19, 2025 · arnica-github-connector
diff --git a/Evals/Helpsteer_exercise.ipynb b/Evals/Helpsteer_exercise.ipynb
diff --git a/Evals/Optimizing_LLM_Judges.ipynb b/Evals/Optimizing_LLM_Judges.ipynb
diff --git a/Evals/install.sh b/Evals/install.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Create virtual environment with venv
+python3.10 -m venv env_cookbook_evals
+
+# Activate virtual environment
+source env_cookbook_evals/bin/activate
+
+# Install requirements
+pip install -r requirements.txt
+
+echo "Setup complete! Virtual environment 'env_cookbook_evals' is now active."
+
diff --git a/Evals/judge_dpo_data/rewardbench2_dpo_train.jsonl b/Evals/judge_dpo_data/rewardbench2_dpo_train.jsonl
diff --git a/Evals/judge_dpo_data/rewardbench2_dpo_val.jsonl b/Evals/judge_dpo_data/rewardbench2_dpo_val.jsonl
diff --git a/Evals/judge_results/baseline_test_DeepSeek_V3.1.jsonl b/Evals/judge_results/baseline_test_DeepSeek_V3.1.jsonl
diff --git a/Evals/judge_results/baseline_test_GPT-OSS_120B.jsonl b/Evals/judge_results/baseline_test_GPT-OSS_120B.jsonl
diff --git a/Evals/judge_results/baseline_test_Kimi_K2_Instruct.jsonl b/Evals/judge_results/baseline_test_Kimi_K2_Instruct.jsonl
diff --git a/Evals/judge_results/baseline_test_Qwen3_235B.jsonl b/Evals/judge_results/baseline_test_Qwen3_235B.jsonl
diff --git a/Evals/requirements.txt b/Evals/requirements.txt
@@ -0,0 +1,105 @@
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.2
+aiosignal==1.4.0
+annotated-types==0.7.0
+anyio==4.12.0
+appnope==0.1.4
+asttokens==3.0.1
+async-timeout==5.0.1
+attrs==25.4.0
+black==25.12.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+click==8.3.1
+comm==0.2.3
+contourpy==1.3.2
+cycler==0.12.1
+datasets==4.4.1
+debugpy==1.8.18
+decorator==5.2.1
+dill==0.4.0
+distro==1.9.0
+dotenv==0.9.9
+eval-type-backport==0.2.2
+exceptiongroup==1.3.1
+executing==2.2.1
+filelock==3.20.0
+fonttools==4.61.1
+frozenlist==1.8.0
+fsspec==2025.10.0
+h11==0.16.0
+hf-xet==1.2.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.36.0
+idna==3.11
+ipykernel==7.1.0
+ipython==8.37.0
+jedi==0.19.2
+jinja2==3.1.6
+jupyter-client==8.7.0
+jupyter-core==5.9.1
+kiwisolver==1.4.9
+markdown-it-py==4.0.0
+markupsafe==3.0.3
+matplotlib==3.10.8
+matplotlib-inline==0.2.1
+mdurl==0.1.2
+multidict==6.7.0
+multiprocess==0.70.18
+mypy-extensions==1.1.0
+nest-asyncio==1.6.0
+numpy==2.2.6
+packaging==25.0
+pandas==2.3.3
+parso==0.8.5
+pathspec==0.12.1
+pexpect==4.9.0
+pillow==11.3.0
+pip==24.0
+platformdirs==4.5.1
+prompt-toolkit==3.0.52
+propcache==0.4.1
+psutil==7.1.3
+ptyprocess==0.7.0
+pure-eval==0.2.3
+pyarrow==22.0.0
+pydantic==2.12.5
+pydantic-core==2.41.5
+pygments==2.19.2
+pyparsing==3.2.5
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+pytokens==0.3.0
+pytz==2025.2
+pyyaml==6.0.3
+pyzmq==27.1.0
+regex==2025.11.3
+requests==2.32.5
+rich==14.2.0
+safetensors==0.7.0
+setuptools==69.2.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+stack-data==0.6.3
+tabulate==0.9.0
+together==1.5.33
+tokenizers==0.22.1
+tomli==2.3.0
+tornado==6.5.3
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.57.3
+typer==0.19.2
+typer-slim==0.20.0
+types-requests==2.32.4.20250913
+types-tabulate==0.9.0.20241207
+types-tqdm==4.67.0.20250809
+typing-extensions==4.15.0
+typing-inspection==0.4.2
+tzdata==2025.3
+urllib3==2.6.1
+wcwidth==0.2.14
+xxhash==3.6.0
+yarl==1.22.0