diff --git a/pyproject.toml b/pyproject.toml
index 27bea0625..843eeace0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,6 +10,7 @@ include = ["*"]
[tool.setuptools.package-data]
"guidellm.data" = ["*.gz"]
"guidellm.benchmark.scenarios" = ["*.json", "**/*.json"]
+"guidellm.benchmark.outputs.html_outputs" = ["*.html"]
[[tool.uv.index]]
name = "pytorch-cpu"
@@ -70,11 +71,12 @@ dependencies = [
"transformers",
"uvloop>=0.18",
"torch",
+ "more-itertools>=10.8.0",
]
[project.optional-dependencies]
# Meta Extras
-all = ["guidellm[perf,tokenizers,audio,vision]"]
+all = ["guidellm[perf,tokenizers,audio,vision,embeddings]"]
recommended = ["guidellm[perf,tokenizers]"]
# Feature Extras
perf = ["orjson", "msgpack", "msgspec", "uvloop"]
@@ -90,6 +92,12 @@ vision = [
"datasets[vision]",
"pillow",
]
+embeddings = [
+ # Quality validation with baseline models
+ "sentence-transformers>=2.2.0",
+ # MTEB benchmark integration
+ "mteb>=1.0.0",
+]
# Dev Tooling
dev = [
# Install all optional dependencies
@@ -179,7 +187,9 @@ module = [
"transformers.*",
"setuptools.*",
"setuptools_git_versioning.*",
- "torchcodec.*"
+ "torchcodec.*",
+ "sentence_transformers.*",
+ "mteb.*"
]
ignore_missing_imports = true
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
index 7e9dab87f..f11461b05 100644
--- a/src/guidellm/__main__.py
+++ b/src/guidellm/__main__.py
@@ -792,5 +792,226 @@ def mock_server(
server.run()
+@benchmark.command(
+ "embeddings",
+ help=(
+ "Run embeddings benchmark with optional quality validation. "
+ "Supports cosine similarity validation and MTEB benchmark evaluation."
+ ),
+ context_settings={"auto_envvar_prefix": "GUIDELLM"},
+)
+@click.option(
+ "--target",
+ type=str,
+ required=True,
+ help="Target backend URL (e.g., http://localhost:8000).",
+)
+@click.option(
+ "--data",
+ type=str,
+ multiple=True,
+ required=True,
+ help=(
+ "HuggingFace dataset ID, path to dataset, path to data file "
+ "(csv/json/jsonl/txt), or synthetic data config."
+ ),
+)
+@click.option(
+ "--profile",
+ default="sweep",
+ type=click.Choice(STRATEGY_PROFILE_CHOICES),
+ help=f"Benchmark profile type. Options: {', '.join(STRATEGY_PROFILE_CHOICES)}.",
+)
+@click.option(
+ "--rate",
+ callback=cli_tools.parse_list_floats,
+ multiple=True,
+ default=None,
+ help="Benchmark rate(s) to test. Meaning depends on profile.",
+)
+@click.option(
+ "--backend",
+ type=click.Choice(list(get_literal_vals(BackendType))),
+ default="openai_http",
+ help=f"Backend type. Options: {', '.join(get_literal_vals(BackendType))}.",
+)
+@click.option(
+ "--backend-kwargs",
+ callback=cli_tools.parse_json,
+ default=None,
+ help='JSON string of backend arguments. E.g., \'{"api_key": "key"}\'',
+)
+@click.option(
+ "--model",
+ default=None,
+ type=str,
+ help="Model ID to benchmark. If not provided, uses first available model.",
+)
+@click.option(
+ "--request-format",
+ default="embeddings",
+ help="Format to use for requests (default: embeddings).",
+)
+@click.option(
+ "--processor",
+ default=None,
+ type=str,
+ help="Processor or tokenizer for token counts. If not provided, loads from model.",
+)
+@click.option(
+ "--data-samples",
+ default=-1,
+ type=int,
+ help="Number of samples from dataset. -1 (default) uses all samples.",
+)
+@click.option(
+ "--outputs",
+ default=["json", "csv", "html"],
+ callback=cli_tools.parse_list,
+ help=(
+ "Comma-separated list of output formats: json,csv,html,console. "
+ "Default: json,csv,html"
+ ),
+)
+@click.option(
+ "--output-dir",
+ type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
+ default=Path.cwd(),
+ help="Directory to save output files. Default: current directory.",
+)
+@click.option(
+ "--max-requests",
+ default=None,
+ type=int,
+ help="Maximum number of requests to execute.",
+)
+@click.option(
+ "--max-errors",
+ default=None,
+ type=int,
+ help="Maximum number of errors before stopping benchmark.",
+)
+@click.option(
+ "--max-duration",
+ default=None,
+ type=float,
+ help="Maximum duration in seconds for benchmark execution.",
+)
+# Embeddings-specific quality validation options
+@click.option(
+ "--enable-quality-validation",
+ is_flag=True,
+ default=False,
+ help="Enable quality validation using cosine similarity against baseline model.",
+)
+@click.option(
+ "--baseline-model",
+ default=None,
+ type=str,
+ help=(
+ "HuggingFace model for baseline comparison. "
+ "E.g., 'sentence-transformers/all-MiniLM-L6-v2'. "
+ "Defaults to target model if not specified."
+ ),
+)
+@click.option(
+ "--quality-tolerance",
+ default=1e-2,
+ type=float,
+ help=(
+ "Cosine similarity tolerance threshold. "
+ "Default: 1e-2 (standard), use 5e-4 for MTEB-level validation."
+ ),
+)
+@click.option(
+ "--enable-mteb",
+ is_flag=True,
+ default=False,
+ help="Enable MTEB benchmark evaluation for standardized quality scoring.",
+)
+@click.option(
+ "--mteb-tasks",
+ callback=cli_tools.parse_list,
+ default=None,
+ help=(
+ "Comma-separated list of MTEB tasks. "
+ "Default: STS12,STS13,STSBenchmark. E.g., 'STS12,STS13,STS14'"
+ ),
+)
+@click.option(
+ "--encoding-format",
+ type=click.Choice(["float", "base64"]),
+ default="float",
+ help="Embedding encoding format. Options: float, base64. Default: float.",
+)
+@click.option(
+ "--disable-console",
+ is_flag=True,
+ default=False,
+ help="Disable all console output (including progress display).",
+)
+@click.option(
+ "--disable-console-interactive",
+ is_flag=True,
+ default=False,
+ help="Disable interactive console elements (progress bar, tables).",
+)
+@click.option(
+ "--random-seed",
+ default=42,
+ type=int,
+ help="Random seed for reproducibility. Default: 42.",
+)
+def embeddings(**kwargs):
+ """Run embeddings benchmark with optional quality validation."""
+ from guidellm.benchmark.embeddings_entrypoints import benchmark_embeddings
+ from guidellm.benchmark.schemas.embeddings import BenchmarkEmbeddingsArgs
+
+ # Only set CLI args that differ from click defaults
+ kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
+
+ # Handle console options
+ disable_console = kwargs.pop("disable_console", False)
+ disable_console_interactive = (
+ kwargs.pop("disable_console_interactive", False) or disable_console
+ )
+ console = Console() if not disable_console else None
+
+ envs = cli_tools.list_set_env()
+ if console and envs:
+ console.print_update(
+ title=(
+ "Note: the following environment variables "
+ "are set and **may** affect configuration"
+ ),
+ details=", ".join(envs),
+ status="warning",
+ )
+
+ try:
+ args = BenchmarkEmbeddingsArgs.create(scenario=None, **kwargs)
+ except ValidationError as err:
+ errs = err.errors(include_url=False, include_context=True, include_input=True)
+ param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
+ raise click.BadParameter(
+ errs[0]["msg"], ctx=click.get_current_context(), param_hint=param_name
+ ) from err
+
+ if uvloop is not None:
+ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+
+ asyncio.run(
+ benchmark_embeddings(
+ args=args,
+ progress=(
+ GenerativeConsoleBenchmarkerProgress()
+ if not disable_console_interactive
+ else None
+ ),
+ console=console,
+ )
+ )
+
+
if __name__ == "__main__":
cli()
diff --git a/src/guidellm/backends/openai/http.py b/src/guidellm/backends/openai/http.py
index d94f30909..1f64fc9f7 100644
--- a/src/guidellm/backends/openai/http.py
+++ b/src/guidellm/backends/openai/http.py
@@ -38,6 +38,8 @@
"/v1/chat/completions": "v1/chat/completions",
"/v1/audio/transcriptions": "v1/audio/transcriptions",
"/v1/audio/translations": "v1/audio/translations",
+ "/v1/embeddings": "v1/embeddings",
+ "embeddings": "v1/embeddings", # Alias for convenience
}
DEFAULT_API = "/v1/chat/completions"
@@ -50,6 +52,9 @@
"audio_translations": "/v1/audio/translations",
}
+# NOTE: This value is taken from httpx's default
+FALLBACK_TIMEOUT = 5.0
+
@Backend.register("openai_http")
class OpenAIHTTPBackend(Backend):
@@ -83,7 +88,8 @@ def __init__(
api_key: str | None = None,
api_routes: dict[str, str] | None = None,
request_handlers: dict[str, Any] | None = None,
- timeout: float = 60.0,
+ timeout: float | None = None,
+ timeout_connect: float | None = FALLBACK_TIMEOUT,
http2: bool = True,
follow_redirects: bool = True,
verify: bool = False,
@@ -133,6 +139,7 @@ def __init__(
self.api_routes = api_routes or DEFAULT_API_PATHS
self.request_handlers = request_handlers
self.timeout = timeout
+ self.timeout_connect = timeout_connect
self.http2 = http2
self.follow_redirects = follow_redirects
self.verify = verify
@@ -162,6 +169,7 @@ def info(self) -> dict[str, Any]:
"target": self.target,
"model": self.model,
"timeout": self.timeout,
+ "timeout_connect": self.timeout_connect,
"http2": self.http2,
"follow_redirects": self.follow_redirects,
"verify": self.verify,
@@ -182,7 +190,11 @@ async def process_startup(self):
self._async_client = httpx.AsyncClient(
http2=self.http2,
- timeout=self.timeout,
+ timeout=httpx.Timeout(
+ FALLBACK_TIMEOUT,
+ read=self.timeout,
+ connect=self.timeout_connect,
+ ),
follow_redirects=self.follow_redirects,
verify=self.verify,
# Allow unlimited connections
diff --git a/src/guidellm/backends/openai/request_handlers.py b/src/guidellm/backends/openai/request_handlers.py
index da548894c..490208dcf 100644
--- a/src/guidellm/backends/openai/request_handlers.py
+++ b/src/guidellm/backends/openai/request_handlers.py
@@ -13,6 +13,8 @@
import base64
from typing import Any, Protocol, cast
+from more_itertools import roundrobin
+
from guidellm.schemas import GenerationRequest, GenerationResponse, UsageMetrics
from guidellm.schemas.request import GenerationRequestArguments
from guidellm.utils import RegistryMixin, json
@@ -20,6 +22,7 @@
__all__ = [
"AudioRequestHandler",
"ChatCompletionsRequestHandler",
+ "EmbeddingsRequestHandler",
"OpenAIRequestHandler",
"OpenAIRequestHandlerFactory",
"TextCompletionsRequestHandler",
@@ -363,7 +366,49 @@ class ChatCompletionsRequestHandler(TextCompletionsRequestHandler):
both streaming and non-streaming chat completion responses.
"""
- def format( # noqa: C901, PLR0912, PLR0915
+ def _format_prompts(
+ self, column_data: list[dict[str, Any]], column_type: str
+ ) -> list[dict[str, Any]]:
+ """
+ Helper method to format different types of data columns
+ into the appropriate structure for chat messages.
+ """
+ formatted_data = []
+ for item in column_data:
+ if column_type == "text_column":
+ formatted_data.append({"type": "text", "text": item})
+ elif column_type == "image_column":
+ formatted_data.append(
+ {
+ "type": "image_url",
+ "image_url": {"url": item.get("image")},
+ }
+ )
+ elif column_type == "video_column":
+ formatted_data.append(
+ {
+ "type": "video_url",
+ "video_url": {"url": item.get("video")},
+ }
+ )
+ elif column_type == "audio_column":
+ formatted_data.append(
+ {
+ "type": "input_audio",
+ "input_audio": {
+ "data": base64.b64encode(item.get("audio", b"")).decode(
+ "utf-8"
+ ),
+ "format": item.get("format"),
+ },
+ }
+ )
+ else:
+ raise ValueError(f"Unsupported column type: {column_type}")
+
+ return formatted_data
+
+ def format(
self,
data: GenerationRequest,
**kwargs,
@@ -410,71 +455,20 @@ def format( # noqa: C901, PLR0912, PLR0915
# Build messages
arguments.body["messages"] = []
- for prefix in data.columns.get("prefix_column", []):
- if not prefix:
- continue
-
+ # Build the system prompt
+ prefix = " ".join(data.columns.get("prefix_column", []))
+ if prefix:
arguments.body["messages"].append({"role": "system", "content": prefix})
- for text in data.columns.get("text_column", []):
- if not text:
- continue
-
+ # Build each prompt then combine into a single user message
+ prompts = [
+ self._format_prompts(data.columns.get(col, []), col)
+ for col in ("text_column", "image_column", "video_column", "audio_column")
+ ]
+ if prompts:
+ # Interleave prompt types
arguments.body["messages"].append(
- {"role": "user", "content": [{"type": "text", "text": text}]}
- )
-
- for image in data.columns.get("image_column", []):
- if not image:
- continue
-
- arguments.body["messages"].append(
- {
- "role": "user",
- "content": [
- {
- "type": "image_url",
- "image_url": {"url": image.get("image")},
- }
- ],
- }
- )
-
- for video in data.columns.get("video_column", []):
- if not video:
- continue
-
- arguments.body["messages"].append(
- {
- "role": "user",
- "content": [
- {
- "type": "video_url",
- "video_url": {"url": video.get("video")},
- }
- ],
- }
- )
-
- for audio in data.columns.get("audio_column", []):
- if not audio:
- continue
-
- arguments.body["messages"].append(
- {
- "role": "user",
- "content": [
- {
- "type": "input_audio",
- "input_audio": {
- "data": base64.b64encode(
- audio.get("audio", b"")
- ).decode("utf-8"),
- "format": audio.get("format"),
- },
- }
- ],
- }
+ {"role": "user", "content": list(roundrobin(*prompts))}
)
return arguments
@@ -667,3 +661,113 @@ def extract_metrics(
text_words=len(text.split()) if text else 0,
text_characters=len(text) if text else 0,
)
+
+
+@OpenAIRequestHandlerFactory.register("/v1/embeddings")
+class EmbeddingsRequestHandler(OpenAIRequestHandler):
+ """
+ Request handler for OpenAI-style embeddings endpoints.
+
+ Handles embeddings requests which do not support streaming and return
+ embedding vectors instead of generated text. Processes input text into
+ embeddings with optional quality validation support.
+ """
+
+ def format(
+ self,
+ data: GenerationRequest,
+ **kwargs,
+ ) -> GenerationRequestArguments:
+ """
+ Format the embeddings generation request.
+
+ :param data: The generation request to format
+ :param **kwargs: Additional keyword arguments (model, encoding_format, etc.)
+ :return: The formatted request arguments
+ """
+ arguments = GenerationRequestArguments()
+ arguments.body = {}
+ arguments.stream = False # Embeddings never stream
+
+ # Add model
+ if kwargs.get("model") is not None:
+ arguments.body["model"] = kwargs["model"]
+
+ # Build input from text columns
+ input_texts = []
+ for text in data.columns.get("text_column", []):
+ if text:
+ input_texts.append(text)
+
+ # Use single string if only one text, otherwise list
+ if len(input_texts) == 1:
+ arguments.body["input"] = input_texts[0]
+ else:
+ arguments.body["input"] = input_texts
+
+ # Add optional parameters
+ if kwargs.get("encoding_format"):
+ arguments.body["encoding_format"] = kwargs["encoding_format"]
+ if kwargs.get("dimensions"):
+ arguments.body["dimensions"] = kwargs["dimensions"]
+ if kwargs.get("truncate_prompt_tokens"):
+ arguments.body["truncate_prompt_tokens"] = kwargs["truncate_prompt_tokens"]
+
+ # Apply extra arguments
+ if kwargs.get("extras"):
+ arguments.body.update(kwargs["extras"])
+
+ return arguments
+
+ def compile_non_streaming(
+ self,
+ request: GenerationRequest,
+ arguments: GenerationRequestArguments,
+ response: Any,
+ ) -> GenerationResponse:
+ """
+ Process a complete non-streaming embeddings API response.
+
+ :param request: Original generation request
+ :param arguments: Request arguments used
+ :param response: Raw API response data
+ :return: GenerationResponse with embeddings data
+ """
+ # Extract usage data
+ usage = response.get("usage", {})
+
+ # Build response (no text output for embeddings)
+ return GenerationResponse(
+ request_id=request.request_id,
+ request_args=arguments.model_dump_json(),
+ text="", # Embeddings don't generate text
+ input_metrics=UsageMetrics(
+ text_tokens=usage.get("prompt_tokens", 0),
+ ),
+ output_metrics=UsageMetrics(
+ text_tokens=0, # No output tokens for embeddings
+ ),
+ )
+
+ def add_streaming_line(self, line: str) -> int | None:
+ """
+ Embeddings do not support streaming.
+
+ :param line: Streaming line (unused)
+ :return: None (not supported)
+ :raises NotImplementedError: Embeddings never stream
+ """
+ raise NotImplementedError("Embeddings do not support streaming")
+
+ def compile_streaming(
+ self, request: GenerationRequest, arguments: GenerationRequestArguments
+ ) -> GenerationResponse:
+ """
+ Embeddings do not support streaming.
+
+ :param request: Generation request (unused)
+ :param arguments: Request arguments (unused)
+ :return: Never returns
+ :raises NotImplementedError: Embeddings never stream
+ """
+ raise NotImplementedError("Embeddings do not support streaming")
diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py
index 56cdb9a72..c0caba404 100644
--- a/src/guidellm/benchmark/benchmarker.py
+++ b/src/guidellm/benchmark/benchmarker.py
@@ -64,7 +64,7 @@ async def run(
environment: Environment,
warmup: TransientPhaseConfig,
cooldown: TransientPhaseConfig,
- sample_requests: int | None = 20,
+ sample_requests: int | None = None,
prefer_response_metrics: bool = True,
progress: (
BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
diff --git a/src/guidellm/benchmark/embeddings_entrypoints.py b/src/guidellm/benchmark/embeddings_entrypoints.py
new file mode 100644
index 000000000..a49dee801
--- /dev/null
+++ b/src/guidellm/benchmark/embeddings_entrypoints.py
@@ -0,0 +1,310 @@
+"""
+Primary interface for executing embeddings benchmarks.
+
+This module orchestrates embeddings benchmarking workflows by coordinating backend
+initialization, data loading, profile configuration, optional quality validation,
+and output generation. Provides the main entry point `benchmark_embeddings` for
+executing new embeddings benchmarks with comprehensive metric tracking.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, cast
+
+from guidellm.benchmark.benchmarker import Benchmarker
+from guidellm.benchmark.entrypoints import (
+ resolve_backend,
+ resolve_processor,
+ resolve_profile,
+ resolve_request_loader,
+)
+from guidellm.benchmark.outputs import (
+ EmbeddingsBenchmarkerConsole,
+ EmbeddingsBenchmarkerOutput,
+)
+from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress
+from guidellm.benchmark.schemas.base import TransientPhaseConfig
+from guidellm.benchmark.schemas.embeddings import (
+ BenchmarkEmbeddingsArgs,
+ EmbeddingsBenchmark,
+ EmbeddingsBenchmarkAccumulator,
+ EmbeddingsBenchmarksReport,
+)
+from guidellm.scheduler import ConstraintInitializer, NonDistributedEnvironment
+from guidellm.schemas import GenerationRequest, GenerationResponse
+from guidellm.utils import Console
+
+__all__ = ["benchmark_embeddings"]
+
+
+async def resolve_embeddings_output_formats(
+ outputs: list[str] | tuple[str],
+ output_dir: str | Path | None,
+ console: Console | None = None,
+) -> dict[str, EmbeddingsBenchmarkerOutput]:
+ """
+ Resolve output format specifications into configured embeddings output
+ handler instances.
+
+ :param outputs: Specification of desired output files/types
+ :param output_dir: Base path for output file generation, or None for
+ default
+ :param console: Console instance for progress reporting, or None
+ :return: Dictionary mapping format names to configured output handler
+ instances
+ """
+ console_step = (
+ console.print_update_step(title="Resolving output formats")
+ if console
+ else None
+ )
+
+ resolved = EmbeddingsBenchmarkerOutput.resolve(
+ outputs=outputs, output_dir=output_dir
+ )
+
+ if console_step:
+ console_step.finish(
+ title="Output formats resolved",
+ details={key: str(val) for key, val in resolved.items()},
+ status_level="success",
+ )
+
+ return resolved
+
+
+async def benchmark_embeddings( # noqa: C901, PLR0912, PLR0915
+ args: BenchmarkEmbeddingsArgs,
+ progress: GenerativeConsoleBenchmarkerProgress | None = None,
+ console: Console | None = None,
+ **constraints: str | ConstraintInitializer | Any,
+) -> tuple[EmbeddingsBenchmarksReport, dict[str, Any]]:
+ """
+ Execute a comprehensive embeddings benchmarking workflow.
+
+ Orchestrates the full embeddings benchmarking pipeline by resolving all
+ components from provided arguments, executing benchmark runs across
+ configured profiles, and finalizing results in specified output formats.
+ Optionally performs quality validation using cosine similarity and MTEB
+ benchmarks.
+
+ :param args: Configuration arguments for the embeddings benchmark
+ execution
+ :param progress: Progress tracker for benchmark execution, or None for
+ no tracking
+ :param console: Console instance for status reporting, or None for
+ silent operation
+ :param constraints: Additional constraint initializers for benchmark
+ limits
+ :return: Tuple of EmbeddingsBenchmarksReport and dictionary of output
+ format results
+
+ Example:
+ ::
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ data=["dataset.json"],
+ enable_quality_validation=True,
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+ report, outputs = await benchmark_embeddings(args)
+ """
+ # Resolve backend
+ backend, model = await resolve_backend(
+ backend=args.backend,
+ target=args.target,
+ model=args.model,
+ request_format=args.request_format or "/v1/embeddings",
+ console=console,
+ **(args.backend_kwargs or {}),
+ )
+
+ # Resolve processor (tokenizer)
+ processor = await resolve_processor(
+ processor=args.processor, model=model, console=console
+ )
+
+ # Resolve request loader for embeddings data
+ request_loader = await resolve_request_loader(
+ data=args.data,
+ model=model,
+ data_args=args.data_args,
+ data_samples=args.data_samples,
+ processor=processor,
+ processor_args=args.processor_args,
+ data_column_mapper=args.data_column_mapper,
+ data_preprocessors=args.data_preprocessors,
+ data_preprocessors_kwargs=args.data_preprocessors_kwargs,
+ data_finalizer=args.data_finalizer,
+ data_collator=args.data_collator,
+ data_sampler=args.data_sampler,
+ data_num_workers=args.data_num_workers,
+ random_seed=args.random_seed,
+ console=console,
+ **(args.dataloader_kwargs or {}),
+ )
+
+ # Resolve transient phases
+ warmup = TransientPhaseConfig.create_from_value(args.warmup)
+ cooldown = TransientPhaseConfig.create_from_value(args.cooldown)
+ if console:
+ console.print_update(
+ title="Resolved transient phase configurations",
+ details="\n".join(
+ [
+ f"Warmup: {warmup}",
+ f"Cooldown: {cooldown}",
+ ]
+ ),
+ status="success",
+ )
+
+ # Resolve profile
+ profile = await resolve_profile(
+ profile=args.profile,
+ rate=args.rate,
+ random_seed=args.random_seed,
+ rampup=0.0, # Embeddings typically don't use rampup
+ constraints=constraints,
+ max_seconds=args.max_duration,
+ max_requests=args.max_requests,
+ max_errors=args.max_errors,
+ max_error_rate=None,
+ max_global_error_rate=None,
+ over_saturation=None,
+ console=console,
+ )
+
+ # Resolve output formats
+ output_formats = await resolve_embeddings_output_formats(
+ outputs=args.outputs, output_dir=args.output_dir, console=console
+ )
+
+ # Initialize quality validation if requested
+ if args.enable_quality_validation:
+ if console:
+ console.print_update(
+ title="Initializing quality validation",
+ details=f"Baseline model: {args.baseline_model or model}",
+ status="info",
+ )
+
+ try:
+ from guidellm.benchmark.quality import EmbeddingsQualityValidator
+
+ _ = EmbeddingsQualityValidator(
+ baseline_model=args.baseline_model or model,
+ tolerance=args.quality_tolerance,
+ )
+
+ if console:
+ console.print_update(
+ title="Quality validation initialized",
+ details=f"Tolerance: {args.quality_tolerance}",
+ status="success",
+ )
+ except ImportError:
+ if console:
+ console.print_update(
+ title="Quality validation unavailable",
+ details=(
+ "sentence-transformers not installed. "
+ "Install with: pip install sentence-transformers"
+ ),
+ status="warning",
+ )
+
+ # Run MTEB evaluation if requested (before main benchmark)
+ mteb_results = None
+ if args.enable_mteb:
+ if console:
+ console.print_update(
+ title="Running MTEB evaluation",
+ details=f"Tasks: {args.mteb_tasks or 'default'}",
+ status="info",
+ )
+
+ try:
+ from guidellm.benchmark.quality import MTEBValidator
+
+ mteb_validator = MTEBValidator(
+ model_name=args.baseline_model or model,
+ task_names=args.mteb_tasks,
+ )
+ mteb_results = mteb_validator.run_evaluation()
+
+ if console:
+ console.print_update(
+ title="MTEB evaluation complete",
+ details=f"Main score: {mteb_results['mteb_main_score']:.4f}",
+ status="success",
+ )
+ except ImportError:
+ if console:
+ console.print_update(
+ title="MTEB evaluation unavailable",
+ details="mteb not installed. Install with: pip install mteb",
+ status="warning",
+ )
+
+ # Create report
+ report = EmbeddingsBenchmarksReport(args=args)
+
+ if console:
+ console.print_update(
+ title="Setup complete, starting embeddings benchmarks...", status="success"
+ )
+ console.print("\n\n")
+
+ # Run benchmarks
+ benchmarker: Benchmarker[
+ EmbeddingsBenchmark, GenerationRequest, GenerationResponse
+ ] = Benchmarker()
+
+ async for benchmark in benchmarker.run(
+ accumulator_class=EmbeddingsBenchmarkAccumulator,
+ benchmark_class=EmbeddingsBenchmark,
+ requests=request_loader,
+ backend=backend,
+ profile=profile,
+ environment=NonDistributedEnvironment(),
+ progress=cast("Any", progress), # type: ignore[arg-type]
+ sample_requests=False, # Embeddings don't need request sampling
+ warmup=warmup,
+ cooldown=cooldown,
+ prefer_response_metrics=True, # Prefer API-provided metrics
+ ):
+ if benchmark:
+ # Inject MTEB results if available
+ if mteb_results and benchmark.metrics.quality:
+ benchmark.metrics.quality.mteb_main_score = mteb_results[
+ "mteb_main_score"
+ ]
+ benchmark.metrics.quality.mteb_task_scores = mteb_results[
+ "mteb_task_scores"
+ ]
+
+ report.benchmarks.append(benchmark)
+
+ # Finalize outputs
+ output_format_results = {}
+ for key, output in output_formats.items():
+ output_result = await output.finalize(report)
+ output_format_results[key] = output_result
+
+ # Print console output
+ if console:
+ await EmbeddingsBenchmarkerConsole(console=console).finalize(report)
+ console.print("\n\n")
+ console.print_update(
+ title=(
+ "Embeddings benchmarking complete, generated "
+ f"{len(report.benchmarks)} benchmark(s)"
+ ),
+ status="success",
+ )
+ for key, value in output_format_results.items():
+ console.print_update(title=f" {key:<8}: {value}", status="debug")
+
+ return report, output_format_results
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
index dd634d9a5..89dd8c044 100644
--- a/src/guidellm/benchmark/entrypoints.py
+++ b/src/guidellm/benchmark/entrypoints.py
@@ -39,7 +39,6 @@
DatasetFinalizer,
DatasetPreprocessor,
FinalizerRegistry,
- GenerativeRequestCollator,
PreprocessorRegistry,
ProcessorFactory,
)
@@ -237,12 +236,12 @@ async def resolve_request_loader(
data_column_mapper: (
DatasetPreprocessor
| dict[str, str | list[str]]
- | Literal["generative_column_mapper"]
+ | Literal["generative_column_mapper", "embeddings_column_mapper"]
),
data_preprocessors: list[DatasetPreprocessor | dict[str, str | list[str]] | str],
data_preprocessors_kwargs: dict[str, Any],
data_finalizer: (DatasetFinalizer | dict[str, Any] | str),
- data_collator: Callable | Literal["generative"] | None,
+ data_collator: Callable | Literal["generative", "embeddings"] | None,
data_sampler: Sampler[int] | Literal["shuffle"] | None,
data_num_workers: int | None,
random_seed: int,
@@ -306,6 +305,16 @@ async def resolve_request_loader(
data_finalizer,
)
+ # Resolve collator from string or use provided callable
+ if callable(data_collator):
+ collator_instance = data_collator
+ elif data_collator == "embeddings":
+ from guidellm.data import EmbeddingsRequestCollator
+ collator_instance = EmbeddingsRequestCollator()
+ else: # default to "generative" or None
+ from guidellm.data import GenerativeRequestCollator
+ collator_instance = GenerativeRequestCollator()
+
request_loader: DataLoader[GenerationRequest] = DataLoader(
data=data,
data_args=data_args,
@@ -316,9 +325,7 @@ async def resolve_request_loader(
),
preprocessors=preprocessors_list,
finalizer=finalizer_instance,
- collator=(
- data_collator if callable(data_collator) else GenerativeRequestCollator()
- ),
+ collator=collator_instance,
sampler=data_sampler,
num_workers=data_num_workers,
random_seed=random_seed,
diff --git a/src/guidellm/benchmark/outputs/__init__.py b/src/guidellm/benchmark/outputs/__init__.py
index 2e321605d..75c4b6b88 100644
--- a/src/guidellm/benchmark/outputs/__init__.py
+++ b/src/guidellm/benchmark/outputs/__init__.py
@@ -11,11 +11,20 @@
from .console import GenerativeBenchmarkerConsole
from .csv import GenerativeBenchmarkerCSV
+from .embeddings_console import EmbeddingsBenchmarkerConsole
+from .embeddings_csv import EmbeddingsBenchmarkerCSV
+from .embeddings_html import EmbeddingsBenchmarkerHTML
+from .embeddings_serialized import EmbeddingsBenchmarkerSerialized
from .html import GenerativeBenchmarkerHTML
-from .output import GenerativeBenchmarkerOutput
+from .output import EmbeddingsBenchmarkerOutput, GenerativeBenchmarkerOutput
from .serialized import GenerativeBenchmarkerSerialized
__all__ = [
+ "EmbeddingsBenchmarkerCSV",
+ "EmbeddingsBenchmarkerConsole",
+ "EmbeddingsBenchmarkerHTML",
+ "EmbeddingsBenchmarkerOutput",
+ "EmbeddingsBenchmarkerSerialized",
"GenerativeBenchmarkerCSV",
"GenerativeBenchmarkerConsole",
"GenerativeBenchmarkerHTML",
diff --git a/src/guidellm/benchmark/outputs/console.py b/src/guidellm/benchmark/outputs/console.py
index 70070c425..d84e433f5 100644
--- a/src/guidellm/benchmark/outputs/console.py
+++ b/src/guidellm/benchmark/outputs/console.py
@@ -265,19 +265,31 @@ def print_run_summary_table(self, report: GenerativeBenchmarksReport):
(benchmark.metrics.output_token_count, "Output Tokens"),
]:
columns.add_value(
- token_metrics.successful.total_sum,
+ (
+ token_metrics.successful.total_sum
+ if token_metrics.successful is not None
+ else 0.0
+ ),
group=group,
name="Comp",
units="Tot",
)
columns.add_value(
- token_metrics.incomplete.total_sum,
+ (
+ token_metrics.incomplete.total_sum
+ if token_metrics.incomplete is not None
+ else 0.0
+ ),
group=group,
name="Inc",
units="Tot",
)
columns.add_value(
- token_metrics.errored.total_sum,
+ (
+ token_metrics.errored.total_sum
+ if token_metrics.errored is not None
+ else 0.0
+ ),
group=group,
name="Err",
units="Tot",
diff --git a/src/guidellm/benchmark/outputs/csv.py b/src/guidellm/benchmark/outputs/csv.py
index 081886cfd..eb4479d25 100644
--- a/src/guidellm/benchmark/outputs/csv.py
+++ b/src/guidellm/benchmark/outputs/csv.py
@@ -621,7 +621,7 @@ def _add_scheduler_metrics(
"""
metrics = benchmark.scheduler_metrics
- requests_made_fields: list[tuple[str, int]] = [
+ requests_made_fields: list[tuple[str, int | None]] = [
("Requests Made Successful", metrics.requests_made.successful),
("Requests Made Incomplete", metrics.requests_made.incomplete),
("Requests Made Errored", metrics.requests_made.errored),
diff --git a/src/guidellm/benchmark/outputs/embeddings_console.py b/src/guidellm/benchmark/outputs/embeddings_console.py
new file mode 100644
index 000000000..848439cc4
--- /dev/null
+++ b/src/guidellm/benchmark/outputs/embeddings_console.py
@@ -0,0 +1,284 @@
+"""
+Console output formatter for embeddings benchmarker results.
+
+Provides console-based output formatting for embeddings benchmark reports,
+organizing metrics into structured tables that display request statistics,
+latency measurements, throughput data, and optional quality validation metrics
+(cosine similarity, MTEB scores). Simplified compared to generative output since
+embeddings don't have output tokens or streaming behavior.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from pydantic import Field
+
+from guidellm.benchmark.outputs.console import ConsoleTableColumnsCollection
+from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+from guidellm.benchmark.schemas.embeddings import EmbeddingsBenchmarksReport
+from guidellm.utils import Console
+
+__all__ = ["EmbeddingsBenchmarkerConsole"]
+
+
+@EmbeddingsBenchmarkerOutput.register(["console"])
+class EmbeddingsBenchmarkerConsole(EmbeddingsBenchmarkerOutput):
+ """
+ Console output formatter for embeddings benchmark reports.
+
+ Renders embeddings benchmark results as formatted tables in the terminal,
+ organizing metrics by category (run summary, request counts, latency,
+ throughput, quality validation) with proper alignment and type-specific
+ formatting for readability.
+ """
+
+ @classmethod
+ def validated_kwargs(cls, *_args, **_kwargs) -> dict[str, Any]:
+ """
+ Validate and return keyword arguments for initialization.
+
+ :return: Empty dict as no additional kwargs are required
+ """
+ return {}
+
+ console: Console = Field(
+ default_factory=Console,
+ description="Console utility for rendering formatted tables",
+ )
+
+ async def finalize(self, report: EmbeddingsBenchmarksReport) -> None:
+ """
+ Print the complete embeddings benchmark report to the console.
+
+ Renders all metric tables including run summary, request counts, latency,
+ throughput, and quality metrics to the console.
+
+ :param report: The completed embeddings benchmark report
+ :return: None (console output only)
+ """
+ self.print_run_summary_table(report)
+ self.print_request_counts_table(report)
+ self.print_request_latency_table(report)
+ self.print_server_throughput_table(report)
+ self.print_quality_metrics_table(report)
+
+ def print_run_summary_table(self, report: EmbeddingsBenchmarksReport):
+ """
+ Print the run summary table with timing and token information.
+
+ :param report: The embeddings benchmark report containing run metadata
+ """
+ columns = ConsoleTableColumnsCollection()
+
+ for benchmark in report.benchmarks:
+ columns.add_value(
+ benchmark.config.strategy.type_,
+ group="Benchmark",
+ name="Strategy",
+ type_="text",
+ )
+ columns.add_value(
+ benchmark.start_time, group="Timings", name="Start", type_="timestamp"
+ )
+ columns.add_value(
+ benchmark.end_time, group="Timings", name="End", type_="timestamp"
+ )
+ columns.add_value(
+ benchmark.duration, group="Timings", name="Dur", units="Sec"
+ )
+ columns.add_value(
+ benchmark.warmup_duration, group="Timings", name="Warm", units="Sec"
+ )
+ columns.add_value(
+ benchmark.cooldown_duration, group="Timings", name="Cool", units="Sec"
+ )
+
+ # Only input tokens for embeddings (no output tokens)
+ token_metrics = benchmark.metrics.input_tokens_count
+ columns.add_value(
+ token_metrics.successful,
+ group="Input Tokens",
+ name="Comp",
+ units="Tot",
+ )
+ columns.add_value(
+ token_metrics.incomplete,
+ group="Input Tokens",
+ name="Inc",
+ units="Tot",
+ )
+ columns.add_value(
+ token_metrics.errored,
+ group="Input Tokens",
+ name="Err",
+ units="Tot",
+ )
+
+ headers, values = columns.get_table_data()
+ self.console.print("\n")
+ self.console.print_table(headers, values, title="Run Summary")
+
+ def print_request_counts_table(self, report: EmbeddingsBenchmarksReport):
+ """
+ Print the request counts table.
+
+ :param report: The embeddings benchmark report
+ """
+ columns = ConsoleTableColumnsCollection()
+
+ for benchmark in report.benchmarks:
+ columns.add_value(
+ benchmark.config.strategy.type_,
+ group="Benchmark",
+ name="Strategy",
+ type_="text",
+ )
+
+ for status in ["successful", "incomplete", "errored", "total"]:
+ count = getattr(benchmark.metrics.request_totals, status)
+ columns.add_value(
+ count,
+ group="Request Counts",
+ name=status.capitalize(),
+ units="Reqs",
+ )
+
+ headers, values = columns.get_table_data()
+ self.console.print("\n")
+ self.console.print_table(headers, values, title="Request Counts")
+
+ def print_request_latency_table(self, report: EmbeddingsBenchmarksReport):
+ """
+ Print the request latency table.
+
+ :param report: The embeddings benchmark report
+ """
+ columns = ConsoleTableColumnsCollection()
+
+ for benchmark in report.benchmarks:
+ columns.add_value(
+ benchmark.config.strategy.type_,
+ group="Benchmark",
+ name="Strategy",
+ type_="text",
+ )
+
+ # Request latency stats
+ columns.add_stats(
+ benchmark.metrics.request_latency,
+ status="successful",
+ group="Request Latency",
+ name="Latency",
+ precision=3,
+ )
+
+ # Request concurrency
+ columns.add_stats(
+ benchmark.metrics.request_concurrency,
+ status="successful",
+ group="Concurrency",
+ name="Concurrent",
+ precision=1,
+ )
+
+ headers, values = columns.get_table_data()
+ self.console.print("\n")
+ self.console.print_table(headers, values, title="Request Latency")
+
+ def print_server_throughput_table(self, report: EmbeddingsBenchmarksReport):
+ """
+ Print the server throughput table.
+
+ :param report: The embeddings benchmark report
+ """
+ columns = ConsoleTableColumnsCollection()
+
+ for benchmark in report.benchmarks:
+ columns.add_value(
+ benchmark.config.strategy.type_,
+ group="Benchmark",
+ name="Strategy",
+ type_="text",
+ )
+
+ # Requests per second
+ columns.add_stats(
+ benchmark.metrics.requests_per_second,
+ status="successful",
+ group="Request Throughput",
+ name="Reqs",
+ precision=2,
+ )
+
+ # Input tokens per second
+ columns.add_stats(
+ benchmark.metrics.input_tokens_per_second,
+ status="successful",
+ group="Token Throughput",
+ name="Input Tok",
+ precision=1,
+ )
+
+ headers, values = columns.get_table_data()
+ self.console.print("\n")
+ self.console.print_table(headers, values, title="Server Throughput")
+
+ def print_quality_metrics_table(self, report: EmbeddingsBenchmarksReport):
+ """
+ Print the quality metrics table (if quality validation was enabled).
+
+ :param report: The embeddings benchmark report
+ """
+ # Check if any benchmark has quality metrics
+ has_quality = any(
+ benchmark.metrics.quality is not None for benchmark in report.benchmarks
+ )
+
+ if not has_quality:
+ return
+
+ columns = ConsoleTableColumnsCollection()
+
+ for benchmark in report.benchmarks:
+ columns.add_value(
+ benchmark.config.strategy.type_,
+ group="Benchmark",
+ name="Strategy",
+ type_="text",
+ )
+
+ if benchmark.metrics.quality:
+ # Cosine similarity
+ if benchmark.metrics.quality.baseline_cosine_similarity:
+ columns.add_stats(
+ benchmark.metrics.quality.baseline_cosine_similarity,
+ status="successful",
+ group="Cosine Similarity",
+ name="Baseline",
+ precision=4,
+ )
+
+ # Self-consistency
+ if benchmark.metrics.quality.self_consistency_score:
+ columns.add_stats(
+ benchmark.metrics.quality.self_consistency_score,
+ status="successful",
+ group="Consistency",
+ name="Self",
+ precision=4,
+ )
+
+ # MTEB main score
+ if benchmark.metrics.quality.mteb_main_score is not None:
+ columns.add_value(
+ benchmark.metrics.quality.mteb_main_score,
+ group="MTEB",
+ name="Main",
+ units="Score",
+ precision=4,
+ )
+
+ headers, values = columns.get_table_data()
+ self.console.print("\n")
+ self.console.print_table(headers, values, title="Quality Metrics")
diff --git a/src/guidellm/benchmark/outputs/embeddings_csv.py b/src/guidellm/benchmark/outputs/embeddings_csv.py
new file mode 100644
index 000000000..c83f3f718
--- /dev/null
+++ b/src/guidellm/benchmark/outputs/embeddings_csv.py
@@ -0,0 +1,393 @@
+"""
+CSV output formatter for embeddings benchmark results.
+
+Provides CSV export functionality for embeddings benchmark reports with comprehensive
+metrics including timing, throughput, latency, input token data, and optional quality
+validation metrics (cosine similarity, MTEB scores). Uses multi-row headers to organize
+metrics hierarchically without output tokens or streaming behavior.
+"""
+
+from __future__ import annotations
+
+import csv
+from pathlib import Path
+from typing import TYPE_CHECKING, Annotated, Any, ClassVar
+
+from pydantic import Field
+
+if TYPE_CHECKING:
+ from _csv import _writer
+
+from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+from guidellm.benchmark.schemas.embeddings import (
+ EmbeddingsBenchmark,
+ EmbeddingsBenchmarksReport,
+)
+from guidellm.schemas import DistributionSummary, StatusDistributionSummary
+from guidellm.utils import safe_format_timestamp
+
+__all__ = ["EmbeddingsBenchmarkerCSV"]
+
+TIMESTAMP_FORMAT: Annotated[str, "Format string for timestamp output in CSV files"] = (
+ "%Y-%m-%d %H:%M:%S"
+)
+
+
+@EmbeddingsBenchmarkerOutput.register("csv")
+class EmbeddingsBenchmarkerCSV(EmbeddingsBenchmarkerOutput):
+ """
+ CSV output formatter for embeddings benchmark results.
+
+ Exports comprehensive embeddings benchmark data to CSV format with
+ multi-row headers organizing metrics into categories including run
+ information, timing, request counts, latency, throughput, input token
+ data, quality validation metrics, and scheduler state. Each benchmark run
+ becomes a row with statistical distributions represented as mean, median,
+ standard deviation, and percentiles.
+
+ :cvar DEFAULT_FILE: Default filename for CSV output
+ """
+
+ DEFAULT_FILE: ClassVar[str] = "embeddings_benchmarks.csv"
+
+ @classmethod
+ def validated_kwargs(
+ cls, output_path: str | Path | None, **_kwargs
+ ) -> dict[str, Any]:
+ """
+ Validate and normalize constructor keyword arguments.
+
+ :param output_path: Path for CSV output file or directory
+ :param _kwargs: Additional keyword arguments (ignored)
+ :return: Normalized keyword arguments dictionary
+ """
+ new_kwargs = {}
+ if output_path is not None:
+ new_kwargs["output_path"] = (
+ Path(output_path) if not isinstance(output_path, Path) else output_path
+ )
+ return new_kwargs
+
+ output_path: Path = Field(
+ default_factory=lambda: Path.cwd(),
+ description=(
+ "Path where the CSV file will be saved, defaults to current "
+ "directory"
+ ),
+ )
+
+ async def finalize(self, report: EmbeddingsBenchmarksReport) -> Path:
+ """
+ Save the embeddings benchmark report as a CSV file.
+
+ :param report: The completed embeddings benchmark report
+ :return: Path to the saved CSV file
+ """
+ output_path = self.output_path
+ if output_path.is_dir():
+ output_path = output_path / EmbeddingsBenchmarkerCSV.DEFAULT_FILE
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+
+ with output_path.open("w", newline="") as file:
+ writer = csv.writer(file)
+ headers: list[list[str]] = []
+ rows: list[list[str | int | float]] = []
+
+ for benchmark in report.benchmarks:
+ benchmark_headers: list[list[str]] = []
+ benchmark_values: list[str | int | float] = []
+
+ self._add_run_info(benchmark, benchmark_headers, benchmark_values)
+ self._add_benchmark_info(benchmark, benchmark_headers, benchmark_values)
+ self._add_timing_info(benchmark, benchmark_headers, benchmark_values)
+ self._add_request_counts(benchmark, benchmark_headers, benchmark_values)
+ self._add_request_latency_metrics(
+ benchmark, benchmark_headers, benchmark_values
+ )
+ self._add_server_throughput_metrics(
+ benchmark, benchmark_headers, benchmark_values
+ )
+ self._add_input_token_metrics(
+ benchmark, benchmark_headers, benchmark_values
+ )
+ self._add_quality_metrics(
+ benchmark, benchmark_headers, benchmark_values
+ )
+ self._add_scheduler_info(
+ benchmark, benchmark_headers, benchmark_values
+ )
+ self._add_runtime_info(report, benchmark_headers, benchmark_values)
+
+ if not headers:
+ headers = benchmark_headers
+ rows.append(benchmark_values)
+
+ self._write_multirow_header(writer, headers)
+ for row in rows:
+ writer.writerow(row)
+
+ return output_path
+
+ def _write_multirow_header(
+ self, writer: _writer, headers: list[list[str]]
+ ) -> None:
+ """
+ Write multi-row header to CSV file.
+
+ Transposes column-wise headers into row-wise header rows with proper
+ alignment for hierarchical metric organization.
+
+ :param writer: CSV writer instance
+ :param headers: List of header columns, each column is [group, name, units]
+ """
+ if not headers:
+ return
+
+ num_rows = max(len(header) for header in headers)
+ header_rows: list[list[str]] = [[] for _ in range(num_rows)]
+
+ for header in headers:
+ for i in range(num_rows):
+ header_rows[i].append(header[i] if i < len(header) else "")
+
+ for row in header_rows:
+ writer.writerow(row)
+
+ def _add_run_info(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add run identification information."""
+ headers.append(["Run Info", "Model", ""])
+ model = (
+ benchmark.config.requests.get("model", "N/A")
+ if isinstance(benchmark.config.requests, dict)
+ else "N/A"
+ )
+ values.append(model)
+
+ headers.append(["Run Info", "Backend", ""])
+ backend = (
+ benchmark.config.backend.get("type", "N/A")
+ if isinstance(benchmark.config.backend, dict)
+ else "N/A"
+ )
+ values.append(backend)
+
+ def _add_benchmark_info(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add benchmark configuration information."""
+ headers.append(["Benchmark", "Strategy", ""])
+ values.append(benchmark.config.strategy.type_)
+
+ if hasattr(benchmark.config.strategy, "rate"):
+ headers.append(["Benchmark", "Rate", "Req/s"])
+ values.append(benchmark.config.strategy.rate or 0)
+
+ def _add_timing_info(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add timing information."""
+ headers.append(["Timings", "Start", ""])
+ values.append(safe_format_timestamp(benchmark.start_time, TIMESTAMP_FORMAT))
+
+ headers.append(["Timings", "End", ""])
+ values.append(safe_format_timestamp(benchmark.end_time, TIMESTAMP_FORMAT))
+
+ headers.append(["Timings", "Duration", "Sec"])
+ values.append(benchmark.duration)
+
+ headers.append(["Timings", "Warmup", "Sec"])
+ values.append(benchmark.warmup_duration)
+
+ headers.append(["Timings", "Cooldown", "Sec"])
+ values.append(benchmark.cooldown_duration)
+
+ def _add_request_counts(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add request count information."""
+ for status in ["successful", "incomplete", "errored", "total"]:
+ count = getattr(benchmark.metrics.request_totals, status)
+ headers.append(["Request Counts", status.capitalize(), "Reqs"])
+ values.append(count)
+
+ def _add_request_latency_metrics(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add request latency metrics."""
+ self._add_stats_for_metric(
+ headers,
+ values,
+ benchmark.metrics.request_latency,
+ "Request Latency",
+ "Latency (s)",
+ )
+
+ self._add_stats_for_metric(
+ headers,
+ values,
+ benchmark.metrics.request_concurrency,
+ "Concurrency",
+ "Concurrent Reqs",
+ )
+
+ def _add_server_throughput_metrics(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add server throughput metrics."""
+ self._add_stats_for_metric(
+ headers,
+ values,
+ benchmark.metrics.requests_per_second,
+ "Request Throughput",
+ "Reqs/s",
+ )
+
+ self._add_stats_for_metric(
+ headers,
+ values,
+ benchmark.metrics.input_tokens_per_second,
+ "Token Throughput",
+ "Input Tok/s",
+ )
+
+ def _add_input_token_metrics(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add input token count metrics (no output tokens for embeddings)."""
+ for status in ["successful", "incomplete", "errored", "total"]:
+ count = getattr(benchmark.metrics.input_tokens_count, status)
+ headers.append(["Input Tokens", status.capitalize(), "Tokens"])
+ values.append(count)
+
+ def _add_quality_metrics(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add quality validation metrics if available."""
+ if not benchmark.metrics.quality:
+ return
+
+ # Cosine similarity
+ if benchmark.metrics.quality.baseline_cosine_similarity:
+ self._add_stats_for_metric(
+ headers,
+ values,
+ benchmark.metrics.quality.baseline_cosine_similarity,
+ "Quality Validation",
+ "Cosine Sim",
+ )
+
+ # Self-consistency
+ if benchmark.metrics.quality.self_consistency_score:
+ self._add_stats_for_metric(
+ headers,
+ values,
+ benchmark.metrics.quality.self_consistency_score,
+ "Quality Validation",
+ "Consistency",
+ )
+
+ # MTEB main score
+ if benchmark.metrics.quality.mteb_main_score is not None:
+ headers.append(["MTEB", "Main Score", ""])
+ values.append(benchmark.metrics.quality.mteb_main_score)
+
+ # MTEB task scores
+ if benchmark.metrics.quality.mteb_task_scores:
+ for task, score in benchmark.metrics.quality.mteb_task_scores.items():
+ headers.append(["MTEB Tasks", task, "Score"])
+ values.append(score)
+
+ def _add_scheduler_info(
+ self,
+ benchmark: EmbeddingsBenchmark,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add scheduler state information."""
+ headers.append(["Scheduler", "Queued Avg", "Sec"])
+ values.append(benchmark.scheduler_metrics.queued_time_avg)
+
+ headers.append(["Scheduler", "Resolve Avg", "Sec"])
+ values.append(benchmark.scheduler_metrics.resolve_time_avg)
+
+ def _add_runtime_info(
+ self,
+ report: EmbeddingsBenchmarksReport,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ ) -> None:
+ """Add runtime environment information."""
+ headers.append(["Runtime", "GuideLLM Ver", ""])
+ values.append(report.metadata.guidellm_version)
+
+ headers.append(["Runtime", "Python Ver", ""])
+ values.append(report.metadata.python_version)
+
+ def _add_stats_for_metric(
+ self,
+ headers: list[list[str]],
+ values: list[str | int | float],
+ stats: StatusDistributionSummary,
+ group: str,
+ metric_name: str,
+ ) -> None:
+ """
+ Add statistical columns for a metric with mean, median, stddev, and percentiles.
+
+ :param headers: Headers list to append to
+ :param values: Values list to append to
+ :param stats: Status distribution summary containing statistics
+ :param group: Metric group name for header
+ :param metric_name: Metric display name
+ """
+ successful_stats: DistributionSummary | None = stats.successful
+
+ # Mean
+ headers.append([group, metric_name, "Mean"])
+ values.append(successful_stats.mean if successful_stats else 0)
+
+ # Median
+ headers.append([group, metric_name, "Median"])
+ values.append(successful_stats.median if successful_stats else 0)
+
+ # Std Dev
+ headers.append([group, metric_name, "StdDev"])
+ values.append(successful_stats.std_dev if successful_stats else 0)
+
+ # P95
+ headers.append([group, metric_name, "P95"])
+ values.append(
+ successful_stats.percentiles.p95 if successful_stats else 0
+ )
+
+ # P99
+ headers.append([group, metric_name, "P99"])
+ values.append(
+ successful_stats.percentiles.p99 if successful_stats else 0
+ )
diff --git a/src/guidellm/benchmark/outputs/embeddings_html.py b/src/guidellm/benchmark/outputs/embeddings_html.py
new file mode 100644
index 000000000..06ffc7390
--- /dev/null
+++ b/src/guidellm/benchmark/outputs/embeddings_html.py
@@ -0,0 +1,347 @@
+"""
+HTML output formatter for embeddings benchmark results.
+
+Transforms embeddings benchmark data into interactive web-based reports by
+building UI data structures, converting keys to camelCase for JavaScript
+compatibility, and injecting formatted data into HTML templates. Simplified
+compared to generative output since embeddings don't have output tokens,
+streaming behavior, or multi-modality support.
+"""
+
+from __future__ import annotations
+
+import json
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, ClassVar
+
+from pydantic import Field
+
+from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+from guidellm.benchmark.schemas.embeddings import (
+ BenchmarkEmbeddingsArgs,
+ EmbeddingsBenchmark,
+ EmbeddingsBenchmarksReport,
+)
+from guidellm.utils import camelize_str, recursive_key_update
+
+__all__ = ["EmbeddingsBenchmarkerHTML"]
+
+
+@EmbeddingsBenchmarkerOutput.register("html")
+class EmbeddingsBenchmarkerHTML(EmbeddingsBenchmarkerOutput):
+ """
+ HTML output formatter for embeddings benchmark results.
+
+ Generates interactive HTML reports from embeddings benchmark data by
+ transforming results into camelCase JSON structures and injecting them into
+ HTML templates. The formatter processes benchmark metrics, creates
+ distribution visualizations, and embeds all data into a pre-built HTML
+ template for browser-based display.
+
+ :cvar DEFAULT_FILE: Default filename for HTML output when a directory is
+ provided
+ """
+
+ DEFAULT_FILE: ClassVar[str] = "embeddings_benchmarks.html"
+
+ output_path: Path = Field(
+ default_factory=lambda: Path.cwd(),
+ description="Directory or file path for saving the HTML report",
+ )
+
+ @classmethod
+ def validated_kwargs(
+ cls, output_path: str | Path | None, **_kwargs
+ ) -> dict[str, Any]:
+ """
+ Validate and normalize output path argument.
+
+ :param output_path: Output file or directory path for the HTML report
+ :return: Dictionary containing validated output_path if provided
+ """
+ validated: dict[str, Any] = {}
+ if output_path is not None:
+ validated["output_path"] = (
+ Path(output_path) if not isinstance(output_path, Path) else output_path
+ )
+ return validated
+
+ async def finalize(self, report: EmbeddingsBenchmarksReport) -> Path:
+ """
+ Generate and save the HTML embeddings benchmark report.
+
+ :param report: Completed embeddings benchmark report
+ :return: Path to the saved HTML report file
+ """
+ output_path = self.output_path
+ if output_path.is_dir():
+ output_path = output_path / self.DEFAULT_FILE
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+
+ data = self._build_ui_data(report.benchmarks, report.args)
+ camel_data = recursive_key_update(deepcopy(data), camelize_str)
+
+ ui_api_data = {
+ "data": camel_data,
+ "guidelLmVersion": report.metadata.guidellm_version,
+ }
+
+ # Load HTML template from package resources
+ import importlib.resources
+ template_content = (
+ importlib.resources.files("guidellm.benchmark.outputs")
+ .joinpath("html_outputs/embeddings_template.html")
+ .read_text()
+ )
+
+ # Inject data into template
+ html_content = template_content.replace(
+ "const uiApiData = {};",
+ f"const uiApiData = {json.dumps(ui_api_data, indent=2)};",
+ )
+
+ output_path.write_text(html_content)
+ return output_path
+
+ def _build_ui_data(
+ self,
+ benchmarks: list[EmbeddingsBenchmark],
+ args: BenchmarkEmbeddingsArgs,
+ ) -> dict[str, Any]:
+ """
+ Build UI data structure from benchmarks and arguments.
+
+ :param benchmarks: List of completed benchmarks
+ :param args: Benchmark arguments
+ :return: Dictionary containing all UI data
+ """
+ return {
+ "run_info": {
+ "model": args.model or "N/A",
+ "backend": str(args.backend),
+ "task": "embeddings",
+ "target": args.target,
+ },
+ "workload_details": self._build_workload_details(benchmarks),
+ "benchmarks": self._build_benchmarks_data(benchmarks),
+ }
+
+ def _build_workload_details(
+ self, benchmarks: list[EmbeddingsBenchmark]
+ ) -> dict[str, Any]:
+ """
+ Build workload details section.
+
+ :param benchmarks: List of completed benchmarks
+ :return: Workload details dictionary
+ """
+ if not benchmarks:
+ return {}
+
+ # Sample from first benchmark
+ first_benchmark = benchmarks[0]
+
+ # Build input text statistics
+ input_texts = []
+ if first_benchmark.requests.successful is not None:
+ for req in first_benchmark.requests.successful[:10]: # Sample first 10
+ if req.input_metrics.text_tokens:
+ input_texts.append(
+ {
+ "tokens": req.input_metrics.text_tokens,
+ "sample": f"Sample request {req.request_id[:8]}...",
+ }
+ )
+
+ successful_count = first_benchmark.metrics.request_totals.successful or 0
+ successful_tokens = first_benchmark.metrics.input_tokens_count.successful or 0
+ return {
+ "prompts": {
+ "samples": input_texts,
+ "token_statistics": {
+ "mean": (
+ successful_tokens / successful_count
+ if successful_count > 0
+ else 0
+ ),
+ },
+ },
+ "quality_validation": self._build_quality_section(first_benchmark)
+ if first_benchmark.metrics.quality
+ else None,
+ }
+
+ def _build_quality_section(
+ self, benchmark: EmbeddingsBenchmark
+ ) -> dict[str, Any] | None:
+ """
+ Build quality validation section.
+
+ :param benchmark: Benchmark with quality metrics
+ :return: Quality section dictionary or None
+ """
+ if not benchmark.metrics.quality:
+ return None
+
+ quality = benchmark.metrics.quality
+ section: dict[str, Any] = {}
+
+ # Cosine similarity distribution
+ if (
+ quality.baseline_cosine_similarity
+ and quality.baseline_cosine_similarity.successful
+ ):
+ section["cosine_similarity"] = {
+ "mean": (
+ quality.baseline_cosine_similarity.successful.mean
+ ),
+ "median": (
+ quality.baseline_cosine_similarity.successful.median
+ ),
+ "std_dev": (
+ quality.baseline_cosine_similarity.successful.std_dev
+ ),
+ "p95": (
+ quality.baseline_cosine_similarity.successful
+ .percentiles.p95
+ ),
+ }
+
+ # MTEB scores
+ if quality.mteb_main_score is not None:
+ section["mteb"] = {
+ "main_score": quality.mteb_main_score,
+ "task_scores": quality.mteb_task_scores or {},
+ }
+
+ return section if section else None
+
+ def _build_benchmarks_data(
+ self, benchmarks: list[EmbeddingsBenchmark]
+ ) -> list[dict[str, Any]]:
+ """
+ Build benchmarks data for visualization.
+
+ :param benchmarks: List of completed benchmarks
+ :return: List of benchmark data dictionaries
+ """
+ results = []
+
+ for benchmark in benchmarks:
+ metrics = benchmark.metrics
+
+ benchmark_data = {
+ "strategy": benchmark.config.strategy.type_,
+ "rate": getattr(benchmark.config.strategy, "rate", None),
+ "duration": benchmark.duration,
+ "warmup_duration": benchmark.warmup_duration,
+ "cooldown_duration": benchmark.cooldown_duration,
+ # Request counts
+ "request_counts": {
+ "successful": metrics.request_totals.successful,
+ "incomplete": metrics.request_totals.incomplete,
+ "errored": metrics.request_totals.errored,
+ "total": metrics.request_totals.total,
+ },
+ # Request metrics
+ "request_latency": self._distribution_to_dict(
+ metrics.request_latency.successful
+ ),
+ "request_concurrency": self._distribution_to_dict(
+ metrics.request_concurrency.successful
+ ),
+ "requests_per_second": self._distribution_to_dict(
+ metrics.requests_per_second.successful
+ ),
+ # Token metrics (input only)
+ "input_tokens": {
+ "total": metrics.input_tokens_count.successful,
+ "per_second": self._distribution_to_dict(
+ metrics.input_tokens_per_second.successful
+ ),
+ },
+ # Quality metrics (if available)
+ "quality": (
+ self._build_quality_data(benchmark)
+ if metrics.quality
+ else None
+ ),
+ }
+
+ results.append(benchmark_data)
+
+ return results
+
+ def _build_quality_data(
+ self, benchmark: EmbeddingsBenchmark
+ ) -> dict[str, Any] | None:
+ """
+ Build quality metrics data.
+
+ :param benchmark: Benchmark with quality metrics
+ :return: Quality data dictionary or None
+ """
+ if not benchmark.metrics.quality:
+ return None
+
+ quality = benchmark.metrics.quality
+ data: dict[str, Any] = {}
+
+ if (
+ quality.baseline_cosine_similarity
+ and quality.baseline_cosine_similarity.successful
+ ):
+ data["cosine_similarity"] = self._distribution_to_dict(
+ quality.baseline_cosine_similarity.successful
+ )
+
+ if quality.self_consistency_score and quality.self_consistency_score.successful:
+ data["self_consistency"] = self._distribution_to_dict(
+ quality.self_consistency_score.successful
+ )
+
+ if quality.mteb_main_score is not None:
+ data["mteb_main_score"] = quality.mteb_main_score
+
+ if quality.mteb_task_scores:
+ data["mteb_task_scores"] = quality.mteb_task_scores
+
+ return data if data else None
+
+ def _distribution_to_dict(
+ self, dist: Any
+ ) -> dict[str, float | None]:
+ """
+ Convert distribution summary to dictionary.
+
+ :param dist: Distribution summary object
+ :return: Dictionary with mean, median, std_dev, and
+ percentiles
+ """
+ if dist is None:
+ return {
+ "mean": None,
+ "median": None,
+ "std_dev": None,
+ "p50": None,
+ "p95": None,
+ "p99": None,
+ }
+
+ return {
+ "mean": dist.mean,
+ "median": dist.median,
+ "std_dev": dist.std_dev,
+ "p50": (
+ dist.percentiles.p50
+ if hasattr(dist, "percentiles")
+ else dist.median
+ ),
+ "p95": (
+ dist.percentiles.p95 if hasattr(dist, "percentiles") else None
+ ),
+ "p99": (
+ dist.percentiles.p99 if hasattr(dist, "percentiles") else None
+ ),
+ }
diff --git a/src/guidellm/benchmark/outputs/embeddings_serialized.py b/src/guidellm/benchmark/outputs/embeddings_serialized.py
new file mode 100644
index 000000000..6378f0fd4
--- /dev/null
+++ b/src/guidellm/benchmark/outputs/embeddings_serialized.py
@@ -0,0 +1,70 @@
+"""
+Serialized output handler for embeddings benchmark reports.
+
+Provides a serialized output implementation that saves embeddings benchmark reports
+to JSON or YAML file formats. Extends the base EmbeddingsBenchmarkerOutput to handle
+file-based persistence of benchmark results.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from pydantic import Field
+
+from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+from guidellm.benchmark.schemas.embeddings import EmbeddingsBenchmarksReport
+
+__all__ = ["EmbeddingsBenchmarkerSerialized"]
+
+
+@EmbeddingsBenchmarkerOutput.register(["json", "yaml"])
+class EmbeddingsBenchmarkerSerialized(EmbeddingsBenchmarkerOutput):
+ """
+ Serialized output handler for embeddings benchmark reports in JSON or YAML formats.
+
+ Persists embeddings benchmark reports to the file system in either JSON or YAML
+ format. Supports flexible path specification, allowing users to provide either
+ a directory (where a default filename will be generated) or an explicit file path.
+
+ Example:
+ ::
+ output = EmbeddingsBenchmarkerSerialized(
+ output_path="/path/to/embeddings_output.json"
+ )
+ result_path = await output.finalize(report)
+ """
+
+ output_path: Path = Field(
+ default_factory=lambda: Path.cwd(),
+ description="Directory or file path for saving the serialized report",
+ )
+
+ @classmethod
+ def validated_kwargs(
+ cls, output_path: str | Path | None, **_kwargs
+ ) -> dict[str, Any]:
+ """
+ Validate and normalize output path keyword arguments.
+
+ :param output_path: Directory or file path for serialization output
+ :param _kwargs: Additional keyword arguments (ignored)
+ :return: Dictionary of validated keyword arguments for class initialization
+ """
+ validated: dict[str, Any] = {}
+ if output_path is not None:
+ validated["output_path"] = (
+ Path(output_path) if not isinstance(output_path, Path) else output_path
+ )
+ return validated
+
+ async def finalize(self, report: EmbeddingsBenchmarksReport) -> Path:
+ """
+ Serialize and save the embeddings benchmark report to the configured
+ output path.
+
+ :param report: The embeddings benchmarks report to serialize
+ :return: Path to the saved report file
+ """
+ return report.save_file(self.output_path)
diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py
index 318d9d4de..084cad611 100644
--- a/src/guidellm/benchmark/outputs/html.py
+++ b/src/guidellm/benchmark/outputs/html.py
@@ -357,7 +357,12 @@ def _build_workload_details(
"""
target = args.target
rate_type = benchmarks[0].config.strategy.type_
- successful_requests = [req for bm in benchmarks for req in bm.requests.successful]
+ successful_requests = [
+ req
+ for bm in benchmarks
+ if bm.requests.successful is not None
+ for req in bm.requests.successful
+ ]
sample_indices = random.sample(
range(len(successful_requests)), min(5, len(successful_requests))
@@ -378,11 +383,13 @@ def _build_workload_details(
prompt_tokens = [
float(req.prompt_tokens) if req.prompt_tokens is not None else -1
for bm in benchmarks
+ if bm.requests.successful is not None
for req in bm.requests.successful
]
output_tokens = [
float(req.output_tokens) if req.output_tokens is not None else -1
for bm in benchmarks
+ if bm.requests.successful is not None
for req in bm.requests.successful
]
@@ -396,6 +403,7 @@ def _build_workload_details(
all_req_times = [
req.info.timings.request_start - min_start_time
for bm in benchmarks
+ if bm.requests.successful is not None
for req in bm.requests.successful
if req.info.timings.request_start is not None
]
@@ -451,22 +459,30 @@ def _build_benchmarks(benchmarks: list[GenerativeBenchmark]) -> list[dict[str, A
"""
result = []
for bm in benchmarks:
+ # Helper to safely get distribution summary or None
+ def get_dist_summary(dist: DistributionSummary | None) -> dict | None:
+ if dist is not None:
+ return _TabularDistributionSummary.from_distribution_summary(
+ dist
+ ).model_dump()
+ return None
+
result.append(
{
- "requests_per_second": bm.metrics.requests_per_second.successful.mean,
- "itl": _TabularDistributionSummary.from_distribution_summary(
- bm.metrics.inter_token_latency_ms.successful
- ).model_dump(),
- "ttft": _TabularDistributionSummary.from_distribution_summary(
+ "requests_per_second": (
+ bm.metrics.requests_per_second.successful.mean
+ if bm.metrics.requests_per_second.successful is not None
+ else 0.0
+ ),
+ "itl": get_dist_summary(bm.metrics.inter_token_latency_ms.successful),
+ "ttft": get_dist_summary(
bm.metrics.time_to_first_token_ms.successful
- ).model_dump(),
- "throughput": _TabularDistributionSummary.from_distribution_summary(
+ ),
+ "throughput": get_dist_summary(
bm.metrics.output_tokens_per_second.successful
- ).model_dump(),
- "time_per_request": (
- _TabularDistributionSummary.from_distribution_summary(
- bm.metrics.request_latency.successful
- ).model_dump()
+ ),
+ "time_per_request": get_dist_summary(
+ bm.metrics.request_latency.successful
),
}
)
diff --git a/src/guidellm/benchmark/outputs/html_outputs/__init__.py b/src/guidellm/benchmark/outputs/html_outputs/__init__.py
new file mode 100644
index 000000000..4a5840cdc
--- /dev/null
+++ b/src/guidellm/benchmark/outputs/html_outputs/__init__.py
@@ -0,0 +1 @@
+"""HTML template resources for benchmark outputs."""
diff --git a/src/guidellm/benchmark/outputs/html_outputs/embeddings_template.html b/src/guidellm/benchmark/outputs/html_outputs/embeddings_template.html
new file mode 100644
index 000000000..5f3012364
--- /dev/null
+++ b/src/guidellm/benchmark/outputs/html_outputs/embeddings_template.html
@@ -0,0 +1,156 @@
+
+
+
+
+
+ GuideLLM Embeddings Benchmark Report
+
+
+
+
+
GuideLLM Embeddings Benchmark Report
+
+
Summary
+
+
+
Metrics
+
+
+
Details
+
+
+
+
+
+
diff --git a/src/guidellm/benchmark/outputs/output.py b/src/guidellm/benchmark/outputs/output.py
index f6ec6e708..dba4f17f0 100644
--- a/src/guidellm/benchmark/outputs/output.py
+++ b/src/guidellm/benchmark/outputs/output.py
@@ -18,9 +18,10 @@
from pydantic import BaseModel, ConfigDict
from guidellm.benchmark.schemas import GenerativeBenchmarksReport
+from guidellm.benchmark.schemas.embeddings import EmbeddingsBenchmarksReport
from guidellm.utils import RegistryMixin
-__all__ = ["GenerativeBenchmarkerOutput"]
+__all__ = ["EmbeddingsBenchmarkerOutput", "GenerativeBenchmarkerOutput"]
class GenerativeBenchmarkerOutput(
@@ -167,3 +168,131 @@ async def finalize(self, report: GenerativeBenchmarksReport) -> Any:
:raises NotImplementedError: Must be implemented by subclasses
"""
...
+
+
+class EmbeddingsBenchmarkerOutput(
+ BaseModel, RegistryMixin[type["EmbeddingsBenchmarkerOutput"]], ABC
+):
+ """
+ Abstract base for embeddings benchmark output formatters with registry support.
+
+ Defines the interface for transforming embeddings benchmark reports into various
+ output formats. Similar to GenerativeBenchmarkerOutput but adapted for embeddings
+ which lack output tokens, streaming metrics, and multi-modality support.
+
+ Example:
+ ::
+ # Register and resolve output formats
+ outputs = EmbeddingsBenchmarkerOutput.resolve(
+ output_formats=["json", "csv"],
+ output_path="./results"
+ )
+
+ # Finalize outputs with benchmark report
+ for output in outputs.values():
+ await output.finalize(report)
+ """
+
+ model_config = ConfigDict(
+ extra="ignore",
+ arbitrary_types_allowed=True,
+ validate_assignment=True,
+ from_attributes=True,
+ use_enum_values=True,
+ )
+
+ @classmethod
+ @abstractmethod
+ def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
+ """
+ Validate and normalize initialization arguments for output formatter.
+
+ :param args: Positional arguments for formatter configuration
+ :param kwargs: Keyword arguments for formatter configuration
+ :return: Validated dictionary of parameters for formatter creation
+ :raises NotImplementedError: Must be implemented by subclasses
+ """
+ ...
+
+ @classmethod
+ def resolve(
+ cls,
+ outputs: (
+ Sequence[str | EmbeddingsBenchmarkerOutput]
+ | Mapping[str, str | dict[str, Any] | EmbeddingsBenchmarkerOutput]
+ | None
+ ),
+ output_dir: str | Path | None,
+ ) -> dict[str, EmbeddingsBenchmarkerOutput]:
+ """
+ Resolve output format specifications into formatter instances.
+
+ :param outputs: Format specifications
+ :param output_dir: Default output directory path
+ :return: Dictionary mapping format keys to instantiated formatter instances
+ :raises TypeError: If format specification type is invalid
+ :raises ValueError: If format resolution or validation fails
+ """
+ if not outputs:
+ return {}
+
+ keys: Sequence[str]
+ values: Sequence[dict[str, Any] | EmbeddingsBenchmarkerOutput]
+ if isinstance(outputs, Mapping):
+ keys = list(outputs.keys())
+ values = list(outputs.values()) # type: ignore[arg-type]
+ else:
+ keys = []
+ values = []
+
+ for out in outputs:
+ if isinstance(out, str) and "." in out:
+ ext = Path(out).suffix[1:].lower()
+ keys.append(ext)
+ values.append({"output_path": Path(output_dir or Path.cwd()) / out})
+ elif isinstance(out, str):
+ keys.append(out)
+ values.append({})
+ elif isinstance(out, EmbeddingsBenchmarkerOutput):
+ keys.append(out.__class__.__name__)
+ values.append(out)
+ else:
+ raise TypeError(
+ "output_formats must be a sequence of strings or "
+ "EmbeddingsBenchmarkerOutput instances, or a mapping."
+ )
+
+ resolved: dict[str, EmbeddingsBenchmarkerOutput] = {}
+ for key, val in zip(keys, values, strict=True):
+ if isinstance(val, EmbeddingsBenchmarkerOutput):
+ resolved[key] = val
+ else:
+ output_class = cls.get_registered_object(key)
+ if output_class is None:
+ available_formats = (
+ list(cls.registry.keys()) if cls.registry else []
+ )
+ raise ValueError(
+ f"Output format '{key}' is not registered. "
+ f"Available formats: {available_formats}"
+ )
+ kwargs = output_class.validated_kwargs(
+ **{"output_path": output_dir, **val} # type: ignore[dict-item]
+ )
+ resolved[key] = output_class(**kwargs)
+
+ return resolved
+
+ @abstractmethod
+ async def finalize(self, report: EmbeddingsBenchmarksReport) -> Any:
+ """
+ Process and persist embeddings benchmark report in the formatter's
+ output format.
+
+ :param report: Embeddings benchmark report containing results to
+ format
+ :return: Format-specific output result (file path, response object,
+ etc.)
+ :raises NotImplementedError: Must be implemented by subclasses
+ """
+ ...
diff --git a/src/guidellm/benchmark/progress.py b/src/guidellm/benchmark/progress.py
index 289e367c0..25eb41308 100644
--- a/src/guidellm/benchmark/progress.py
+++ b/src/guidellm/benchmark/progress.py
@@ -32,6 +32,8 @@
from guidellm.benchmark.schemas import (
BenchmarkAccumulatorT,
BenchmarkT,
+ EmbeddingsBenchmark,
+ EmbeddingsBenchmarkAccumulator,
GenerativeBenchmark,
GenerativeBenchmarkAccumulator,
)
@@ -181,7 +183,7 @@ async def on_benchmark_start(self, strategy: SchedulingStrategy):
async def on_benchmark_update(
self,
- accumulator: GenerativeBenchmarkAccumulator,
+ accumulator: GenerativeBenchmarkAccumulator | EmbeddingsBenchmarkAccumulator,
scheduler_state: SchedulerState,
):
"""
@@ -307,7 +309,7 @@ def start_benchmark(self, strategy: SchedulingStrategy):
def update_benchmark(
self,
- accumulator: GenerativeBenchmarkAccumulator,
+ accumulator: GenerativeBenchmarkAccumulator | EmbeddingsBenchmarkAccumulator,
scheduler_state: SchedulerState,
):
current_state = self.benchmark_task_states[self.current_index]
@@ -356,6 +358,7 @@ class _GenerativeProgressTaskState:
queued_time: float = 0.0
request_targeted_start_delay: float = 0.0
scheduler_overheads_time: float = 0.0
+ is_embeddings: bool = False # Track if this is an embeddings benchmark
@property
def current(self) -> dict[str, Any]:
@@ -473,6 +476,28 @@ def formatted_tokens_summary(self) -> str:
if self.benchmark_status == "pending":
return " "
+ # Show simplified metrics for embeddings (no output tokens, TTFT, ITL)
+ if self.is_embeddings:
+ return (
+ f"[{Colors.info}]Tok:[/{Colors.info}] "
+ + format_value_display(
+ value=self.total_tokens_rate,
+ label="inp/s",
+ total_characters=12,
+ digits_places=4,
+ decimal_places=1,
+ )
+ + ", "
+ + format_value_display(
+ value=self.prompt_tokens,
+ label="Input",
+ total_characters=12,
+ digits_places=4,
+ decimal_places=0,
+ )
+ )
+
+ # Full metrics for generative models
return (
f"[{Colors.info}]Tok:[/{Colors.info}] "
+ format_value_display(
@@ -566,7 +591,7 @@ def start(self, strategy: SchedulingStrategy):
def update(
self,
- accumulator: GenerativeBenchmarkAccumulator,
+ accumulator: GenerativeBenchmarkAccumulator | EmbeddingsBenchmarkAccumulator,
scheduler_state: SchedulerState,
):
self.progress = (
@@ -586,15 +611,33 @@ def update(
requests_per_second=accumulator.completed_metrics.requests.rate_per_second,
request_latency=accumulator.completed_metrics.request_latency.mean,
)
- self._update_token_stats(
- output_tokens=accumulator.completed_metrics.total_tokens.mean,
- output_tokens_rate=accumulator.completed_metrics.output_tokens.rate_per_second,
- prompt_tokens=accumulator.completed_metrics.prompt_tokens.mean,
- total_tokens_rate=accumulator.completed_metrics.total_tokens.rate_per_second,
- time_to_first_token=accumulator.completed_metrics.time_to_first_token_ms.mean,
- inter_token_latency=accumulator.completed_metrics.inter_token_latency_ms.mean,
- converted=True,
- )
+
+ # Handle token stats differently for embeddings vs generative
+ if isinstance(accumulator, EmbeddingsBenchmarkAccumulator):
+ # Mark as embeddings benchmark
+ self.is_embeddings = True
+ # For embeddings: no output tokens, TTFT, or ITL
+ self._update_token_stats(
+ output_tokens=0.0,
+ output_tokens_rate=0.0,
+ prompt_tokens=accumulator.completed_metrics.prompt_tokens.mean,
+ total_tokens_rate=accumulator.completed_metrics.prompt_tokens.rate_per_second,
+ time_to_first_token=0.0,
+ inter_token_latency=0.0,
+ converted=True,
+ )
+ else:
+ # For generative: full token stats
+ self._update_token_stats(
+ output_tokens=accumulator.completed_metrics.total_tokens.mean,
+ output_tokens_rate=accumulator.completed_metrics.output_tokens.rate_per_second,
+ prompt_tokens=accumulator.completed_metrics.prompt_tokens.mean,
+ total_tokens_rate=accumulator.completed_metrics.total_tokens.rate_per_second,
+ time_to_first_token=accumulator.completed_metrics.time_to_first_token_ms.mean,
+ inter_token_latency=accumulator.completed_metrics.inter_token_latency_ms.mean,
+ converted=True,
+ )
+
self._update_system_stats(
request_targeted_start_delay=accumulator.scheduler_metrics.request_targeted_start_delay.mean,
queued_time=accumulator.scheduler_metrics.queued_time.mean,
@@ -602,7 +645,7 @@ def update(
converted=False,
)
- def complete(self, benchmark: GenerativeBenchmark):
+ def complete(self, benchmark: GenerativeBenchmark | EmbeddingsBenchmark):
self._update_processing_states(
benchmark_status="completed",
start_time=benchmark.start_time,
@@ -611,24 +654,89 @@ def complete(self, benchmark: GenerativeBenchmark):
errored_requests=benchmark.metrics.request_totals.errored,
)
self._update_request_stats(
- request_concurrency=benchmark.metrics.request_concurrency.successful.mean,
- requests_per_second=benchmark.metrics.requests_per_second.successful.mean,
- request_latency=benchmark.metrics.request_latency.successful.mean,
- )
- self._update_token_stats(
- output_tokens=benchmark.metrics.output_token_count.successful.mean,
- output_tokens_rate=benchmark.metrics.output_tokens_per_second.successful.mean,
- prompt_tokens=benchmark.metrics.prompt_token_count.successful.mean,
- total_tokens_rate=benchmark.metrics.tokens_per_second.successful.mean,
- time_to_first_token=(
- benchmark.metrics.time_to_first_token_ms.successful.mean
+ request_concurrency=(
+ benchmark.metrics.request_concurrency.successful.mean
+ if benchmark.metrics.request_concurrency.successful is not None
+ else 0.0
),
- inter_token_latency=(
- benchmark.metrics.inter_token_latency_ms.successful.mean
+ requests_per_second=(
+ benchmark.metrics.requests_per_second.successful.mean
+ if benchmark.metrics.requests_per_second.successful is not None
+ else 0.0
+ ),
+ request_latency=(
+ benchmark.metrics.request_latency.successful.mean
+ if benchmark.metrics.request_latency.successful is not None
+ else 0.0
),
- converted=True,
)
+ # Handle token stats differently for embeddings vs generative benchmarks
+ if isinstance(benchmark, EmbeddingsBenchmark):
+ # Mark as embeddings benchmark
+ self.is_embeddings = True
+ # For embeddings: output_token_count is StatusBreakdown[int] not stats
+ # Get successful token count
+ prompt_tokens: int
+ if hasattr(benchmark.metrics, "input_tokens_count"):
+ prompt_tokens = benchmark.metrics.input_tokens_count.successful or 0
+ else:
+ prompt_tokens = (
+ benchmark.metrics.prompt_token_count.successful
+ if benchmark.metrics.prompt_token_count is not None
+ and benchmark.metrics.prompt_token_count.successful is not None
+ else 0
+ )
+
+ self._update_token_stats(
+ output_tokens=0.0, # Embeddings have no output tokens
+ output_tokens_rate=0.0,
+ prompt_tokens=prompt_tokens,
+ total_tokens_rate=(
+ benchmark.metrics.input_tokens_per_second.successful.mean
+ if benchmark.metrics.input_tokens_per_second.successful is not None
+ else 0.0
+ ),
+ time_to_first_token=0.0, # No TTFT for embeddings
+ inter_token_latency=0.0, # No ITL for embeddings
+ converted=True,
+ )
+ else:
+ # For generative: output_token_count is StatusDistributionSummary
+ self._update_token_stats(
+ output_tokens=(
+ benchmark.metrics.output_token_count.successful.mean
+ if benchmark.metrics.output_token_count.successful is not None
+ else 0.0
+ ),
+ output_tokens_rate=(
+ benchmark.metrics.output_tokens_per_second.successful.mean
+ if benchmark.metrics.output_tokens_per_second.successful is not None
+ else 0.0
+ ),
+ prompt_tokens=(
+ benchmark.metrics.prompt_token_count.successful.mean
+ if benchmark.metrics.prompt_token_count.successful is not None
+ else 0.0
+ ),
+ total_tokens_rate=(
+ benchmark.metrics.tokens_per_second.successful.mean
+ if benchmark.metrics.tokens_per_second.successful is not None
+ else 0.0
+ ),
+ time_to_first_token=(
+ benchmark.metrics.time_to_first_token_ms.successful.mean
+ if benchmark.metrics.time_to_first_token_ms.successful is not None
+ else 0.0
+ ),
+ inter_token_latency=(
+ benchmark.metrics.inter_token_latency_ms.successful.mean
+ if benchmark.metrics.inter_token_latency_ms.successful is not None
+ else 0.0
+ ),
+ converted=True,
+ )
+
@staticmethod
def _map_status(
status: Literal["pending", "warmup", "active", "cooldown", "completed"],
diff --git a/src/guidellm/benchmark/quality/__init__.py b/src/guidellm/benchmark/quality/__init__.py
new file mode 100644
index 000000000..e4d22e08c
--- /dev/null
+++ b/src/guidellm/benchmark/quality/__init__.py
@@ -0,0 +1,19 @@
+"""
+Quality validation and benchmarking tools for embeddings.
+
+This module provides comprehensive quality validation capabilities for embeddings
+including cosine similarity validation against baseline models and MTEB (Massive
+Text Embedding Benchmark) integration for standardized quality evaluation.
+"""
+
+from __future__ import annotations
+
+from .mteb_integration import DEFAULT_MTEB_TASKS, MTEBValidator
+from .validators import EmbeddingsQualityValidator, compute_cosine_similarity
+
+__all__ = [
+ "DEFAULT_MTEB_TASKS",
+ "EmbeddingsQualityValidator",
+ "MTEBValidator",
+ "compute_cosine_similarity",
+]
diff --git a/src/guidellm/benchmark/quality/mteb_integration.py b/src/guidellm/benchmark/quality/mteb_integration.py
new file mode 100644
index 000000000..b328dce09
--- /dev/null
+++ b/src/guidellm/benchmark/quality/mteb_integration.py
@@ -0,0 +1,274 @@
+"""
+MTEB (Massive Text Embedding Benchmark) integration for embeddings quality evaluation.
+
+Provides standardized benchmark evaluation using MTEB tasks like STS (Semantic Textual
+Similarity) to measure embedding quality across multiple standardized datasets. Follows
+vLLM patterns for MTEB evaluation with configurable task selection and lightweight
+defaults suitable for CI/CD environments.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+
+__all__ = [
+ "DEFAULT_MTEB_TASKS",
+ "MTEBValidator",
+]
+
+DEFAULT_MTEB_TASKS = ["STS12", "STS13", "STSBenchmark"]
+"""Default MTEB tasks for lightweight evaluation (Semantic Textual Similarity)."""
+
+
+class MTEBValidator:
+ """
+ MTEB benchmark integration for standardized quality evaluation.
+
+ Runs MTEB evaluation tasks on embedding models to produce standardized quality
+ scores. Supports configurable task selection with defaults focused on lightweight
+ STS (Semantic Textual Similarity) tasks suitable for regular benchmarking.
+
+ Example:
+ ::
+ validator = MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
+ task_names=["STS12", "STS13"]
+ )
+
+ results = validator.run_evaluation()
+ print(f"MTEB Main Score: {results['mteb_main_score']:.4f}")
+ for task, score in results['mteb_task_scores'].items():
+ print(f"{task}: {score:.4f}")
+ """
+
+ def __init__(
+ self,
+ model_name: str,
+ task_names: list[str] | None = None,
+ device: str | None = None,
+ batch_size: int = 32,
+ ):
+ """
+ Initialize MTEB validator with model and task configuration.
+
+ :param model_name: HuggingFace model name or path for evaluation
+ :param task_names: List of MTEB tasks to evaluate (uses
+ DEFAULT_MTEB_TASKS if None)
+ :param device: Device for model inference ("cpu", "cuda", "mps", or
+ None for auto)
+ :param batch_size: Batch size for encoding during evaluation
+ :raises ImportError: If mteb or sentence-transformers is not
+ installed
+ """
+ try:
+ from sentence_transformers import SentenceTransformer
+ except ImportError as e:
+ raise ImportError(
+ "sentence-transformers is required for MTEB evaluation. "
+ "Install with: pip install sentence-transformers"
+ ) from e
+
+ try:
+ import mteb
+ except ImportError as e:
+ raise ImportError(
+ "mteb is required for MTEB evaluation. "
+ "Install with: pip install mteb"
+ ) from e
+
+ self.model_name = model_name
+ self.task_names = task_names if task_names is not None else DEFAULT_MTEB_TASKS
+ self.device = device
+ self.batch_size = batch_size
+
+ # Load model
+ self.model = SentenceTransformer(model_name, device=device)
+
+ # Store mteb module reference
+ self.mteb = mteb
+
+ def run_evaluation( # noqa: C901
+ self,
+ output_folder: str | None = None,
+ verbosity: int = 1,
+ ) -> dict[str, Any]:
+ """
+ Run MTEB evaluation on configured tasks.
+
+ Executes MTEB benchmark tasks and computes standardized quality scores.
+ Returns both individual task scores and an aggregated main score.
+
+ :param output_folder: Optional folder to save detailed results
+ :param verbosity: Verbosity level (0=silent, 1=progress, 2=detailed)
+ :return: Dictionary with 'mteb_main_score' and 'mteb_task_scores'
+
+ Example:
+ ::
+ results = validator.run_evaluation()
+
+ # Access main score (average across tasks)
+ main_score = results['mteb_main_score']
+
+ # Access individual task scores
+ for task, score in results['mteb_task_scores'].items():
+ print(f"{task}: {score:.4f}")
+ """
+ # Get MTEB task objects
+ tasks = self.mteb.get_tasks(tasks=self.task_names)
+
+ # Create MTEB evaluation object
+ evaluation = self.mteb.MTEB(tasks=tasks)
+
+ # Run evaluation
+ results = evaluation.run(
+ self.model,
+ output_folder=output_folder,
+ verbosity=verbosity,
+ encode_kwargs={"batch_size": self.batch_size},
+ )
+
+ # Extract scores from results
+ task_scores = {}
+ for task_name in self.task_names:
+ if task_name in results:
+ # MTEB results structure varies by task type
+ # Try to extract main_score or test score
+ task_result = results[task_name]
+
+ if isinstance(task_result, dict):
+ # Look for main_score in various possible locations
+ if "main_score" in task_result:
+ task_scores[task_name] = float(
+ task_result["main_score"]
+ )
+ elif "test" in task_result and isinstance(
+ task_result["test"], dict
+ ):
+ # Some tasks have test split with scores
+ test_result = task_result["test"]
+ if "main_score" in test_result:
+ task_scores[task_name] = float(
+ test_result["main_score"]
+ )
+ elif "cosine_spearman" in test_result:
+ # STS tasks use cosine_spearman as primary
+ task_scores[task_name] = float(
+ test_result["cosine_spearman"]
+ )
+ elif "scores" in task_result:
+ # Fallback to scores field
+ scores = task_result["scores"]
+ if isinstance(scores, list) and scores:
+ task_scores[task_name] = float(np.mean(scores))
+ elif isinstance(scores, int | float):
+ task_scores[task_name] = float(scores)
+
+ # Compute main score as average across tasks
+ main_score = (
+ float(np.mean(list(task_scores.values())))
+ if task_scores
+ else 0.0
+ )
+
+ return {
+ "mteb_main_score": main_score,
+ "mteb_task_scores": task_scores,
+ }
+
+ def get_available_tasks(self) -> list[str]:
+ """
+ Get list of all available MTEB tasks.
+
+ :return: List of available task names
+
+ Example:
+ ::
+ validator = MTEBValidator(model_name="...")
+ tasks = validator.get_available_tasks()
+ print(f"Available tasks: {tasks}")
+ """
+ all_tasks = self.mteb.get_tasks()
+ return [task.metadata.name for task in all_tasks]
+
+ def get_task_info(self, task_name: str) -> dict[str, Any]:
+ """
+ Get metadata information about a specific MTEB task.
+
+ :param task_name: Name of the MTEB task
+ :return: Dictionary with task metadata
+ :raises ValueError: If task is not found
+
+ Example:
+ ::
+ info = validator.get_task_info("STS12")
+ print(f"Task: {info['name']}")
+ print(f"Description: {info['description']}")
+ """
+ tasks = self.mteb.get_tasks(tasks=[task_name])
+
+ if not tasks:
+ raise ValueError(f"MTEB task '{task_name}' not found")
+
+ task = tasks[0]
+ metadata = task.metadata
+
+ return {
+ "name": metadata.name,
+ "description": getattr(metadata, "description", ""),
+ "type": getattr(metadata, "type", ""),
+ "category": getattr(metadata, "category", ""),
+ "eval_splits": getattr(metadata, "eval_splits", []),
+ "main_score": getattr(metadata, "main_score", ""),
+ }
+
+ @staticmethod
+ def get_recommended_tasks(category: str = "sts") -> list[str]:
+ """
+ Get recommended MTEB tasks for specific evaluation categories.
+
+ :param category: Evaluation category ("sts", "classification",
+ "retrieval", etc.)
+ :return: List of recommended task names
+
+ Example:
+ ::
+ sts_tasks = MTEBValidator.get_recommended_tasks("sts")
+ # Returns: ["STS12", "STS13", "STS14", "STS15", "STS16", "STSBenchmark"]
+ """
+ recommendations = {
+ "sts": [
+ "STS12",
+ "STS13",
+ "STS14",
+ "STS15",
+ "STS16",
+ "STSBenchmark",
+ "SICKRelatedness",
+ ],
+ "classification": [
+ "AmazonCounterfactualClassification",
+ "AmazonPolarityClassification",
+ "AmazonReviewsClassification",
+ "Banking77Classification",
+ "EmotionClassification",
+ ],
+ "clustering": [
+ "ArxivClusteringP2P",
+ "ArxivClusteringS2S",
+ "BiorxivClusteringP2P",
+ "BiorxivClusteringS2S",
+ "MedrxivClusteringP2P",
+ ],
+ "retrieval": [
+ "ArguAna",
+ "ClimateFEVER",
+ "CQADupstackRetrieval",
+ "DBPedia",
+ "FEVER",
+ ],
+ "lightweight": DEFAULT_MTEB_TASKS, # Fastest tasks for CI/CD
+ }
+
+ return recommendations.get(category.lower(), DEFAULT_MTEB_TASKS)
diff --git a/src/guidellm/benchmark/quality/validators.py b/src/guidellm/benchmark/quality/validators.py
new file mode 100644
index 000000000..508951e91
--- /dev/null
+++ b/src/guidellm/benchmark/quality/validators.py
@@ -0,0 +1,329 @@
+"""
+Quality validation for embeddings benchmarks.
+
+Provides tools for validating embedding quality through cosine similarity
+comparison against baseline models. Supports HuggingFace SentenceTransformers
+models as baselines and implements tolerance-based validation following vLLM
+patterns (1e-2 standard, 5e-4 MTEB).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+if TYPE_CHECKING:
+ from numpy.typing import NDArray
+
+__all__ = [
+ "EmbeddingsQualityValidator",
+ "compute_cosine_similarity",
+]
+
+
+def compute_cosine_similarity(
+ emb1: NDArray[np.float32] | list[float],
+ emb2: NDArray[np.float32] | list[float],
+) -> float:
+ """
+ Compute cosine similarity between two embedding vectors.
+
+ Cosine similarity measures the cosine of the angle between two vectors,
+ ranging from -1 (opposite) to 1 (identical direction). For normalized
+ embeddings, this is equivalent to the dot product.
+
+ Formula: cos_sim = dot(emb1, emb2) / (||emb1|| * ||emb2||)
+
+ :param emb1: First embedding vector (numpy array or list)
+ :param emb2: Second embedding vector (numpy array or list)
+ :return: Cosine similarity score between -1.0 and 1.0
+ :raises ValueError: If embeddings have different dimensions or are empty
+
+ Example:
+ ::
+ emb1 = np.array([1.0, 0.0, 0.0])
+ emb2 = np.array([1.0, 0.0, 0.0])
+ similarity = compute_cosine_similarity(emb1, emb2) # Returns 1.0
+
+ emb3 = np.array([0.0, 1.0, 0.0])
+ similarity = compute_cosine_similarity(emb1, emb3) # Returns 0.0
+ """
+ # Convert to numpy arrays if needed
+ vec1 = np.array(emb1, dtype=np.float32)
+ vec2 = np.array(emb2, dtype=np.float32)
+
+ # Validate dimensions
+ if vec1.shape != vec2.shape:
+ raise ValueError(
+ f"Embedding dimensions must match: {vec1.shape} vs {vec2.shape}"
+ )
+
+ if vec1.size == 0:
+ raise ValueError("Embeddings cannot be empty")
+
+ # Compute norms
+ norm1 = np.linalg.norm(vec1)
+ norm2 = np.linalg.norm(vec2)
+
+ # Handle zero vectors
+ if norm1 == 0.0 or norm2 == 0.0:
+ return 0.0
+
+ # Compute cosine similarity
+ dot_product = np.dot(vec1, vec2)
+ cosine_sim = dot_product / (norm1 * norm2)
+
+ return float(cosine_sim)
+
+
+class EmbeddingsQualityValidator:
+ """
+ Validates embedding quality against baseline models.
+
+ Loads a HuggingFace SentenceTransformers model as a baseline and compares
+ target embeddings against baseline outputs using cosine similarity. Supports
+ configurable tolerance thresholds following vLLM patterns.
+
+ Example:
+ ::
+ validator = EmbeddingsQualityValidator(
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2",
+ tolerance=1e-2
+ )
+
+ text = "This is a test sentence"
+ target_embedding = [0.1, 0.2, 0.3, ...] # From target model
+
+ similarity = validator.validate_against_baseline(text, target_embedding)
+ is_valid = validator.check_tolerance(similarity)
+ """
+
+ def __init__(
+ self,
+ baseline_model: str,
+ tolerance: float = 1e-2,
+ device: str | None = None,
+ ):
+ """
+ Initialize quality validator with baseline model.
+
+ :param baseline_model: HuggingFace model name or path
+ (e.g., "sentence-transformers/all-MiniLM-L6-v2")
+ :param tolerance: Cosine similarity tolerance threshold
+ (1e-2 for standard, 5e-4 for MTEB-level validation)
+ :param device: Device for model inference ("cpu", "cuda", "mps", or
+ None for auto)
+ :raises ImportError: If sentence-transformers is not installed
+ """
+ try:
+ from sentence_transformers import SentenceTransformer
+ except ImportError as e:
+ raise ImportError(
+ "sentence-transformers is required for quality validation. "
+ "Install with: pip install sentence-transformers"
+ ) from e
+
+ self.baseline_model_name = baseline_model
+ self.tolerance = tolerance
+ self.device = device
+
+ # Load baseline model
+ self.baseline_model = SentenceTransformer(baseline_model, device=device)
+
+ def encode_baseline(
+ self,
+ texts: str | list[str],
+ normalize: bool = True,
+ batch_size: int = 32,
+ ) -> NDArray[np.float32]:
+ """
+ Generate embeddings using the baseline model.
+
+ :param texts: Single text or list of texts to encode
+ :param normalize: Whether to normalize embeddings to unit length
+ :param batch_size: Batch size for encoding
+ :return: Embeddings as numpy array (shape: [n_texts, embedding_dim])
+ """
+ embeddings = self.baseline_model.encode(
+ texts,
+ normalize_embeddings=normalize,
+ batch_size=batch_size,
+ show_progress_bar=False,
+ )
+
+ # Ensure return type is correct
+ if isinstance(texts, str):
+ return np.array(embeddings, dtype=np.float32)
+ return np.array(embeddings, dtype=np.float32)
+
+ def validate_against_baseline(
+ self,
+ text: str,
+ target_embedding: NDArray[np.float32] | list[float],
+ normalize: bool = True,
+ ) -> float:
+ """
+ Compare target embedding against baseline model output.
+
+ :param text: Input text that was embedded
+ :param target_embedding: Embedding from target model to validate
+ :param normalize: Whether to normalize embeddings before comparison
+ :return: Cosine similarity score (0.0 to 1.0)
+
+ Example:
+ ::
+ text = "Example sentence"
+ target_emb = model.encode(text) # From target model
+ similarity = validator.validate_against_baseline(text, target_emb)
+ # High similarity (>0.95) indicates good quality
+ """
+ # Generate baseline embedding
+ baseline_embedding = self.encode_baseline(text, normalize=normalize)
+
+ # Convert target to numpy if needed
+ target_array = np.array(target_embedding, dtype=np.float32)
+
+ # Normalize target if requested
+ if normalize:
+ norm = np.linalg.norm(target_array)
+ if norm > 0:
+ target_array = target_array / norm
+
+ # Compute similarity
+ return compute_cosine_similarity(baseline_embedding, target_array)
+
+ def validate_batch(
+ self,
+ texts: list[str],
+ target_embeddings: NDArray[np.float32] | list[list[float]],
+ normalize: bool = True,
+ ) -> list[float]:
+ """
+ Validate multiple embeddings against baseline model.
+
+ :param texts: List of input texts
+ :param target_embeddings: Embeddings from target model (shape: [n, dim])
+ :param normalize: Whether to normalize embeddings before comparison
+ :return: List of cosine similarity scores
+
+ Example:
+ ::
+ texts = ["Text 1", "Text 2", "Text 3"]
+ target_embs = model.encode(texts)
+ similarities = validator.validate_batch(texts, target_embs)
+ mean_similarity = np.mean(similarities)
+ """
+ # Generate baseline embeddings for all texts
+ baseline_embeddings = self.encode_baseline(texts, normalize=normalize)
+
+ # Convert target to numpy if needed
+ target_array = np.array(target_embeddings, dtype=np.float32)
+
+ # Normalize targets if requested
+ if normalize:
+ norms = np.linalg.norm(target_array, axis=1, keepdims=True)
+ target_array = np.where(norms > 0, target_array / norms, target_array)
+
+ # Compute similarities
+ similarities = []
+ for baseline_emb, target_emb in zip(
+ baseline_embeddings, target_array, strict=False
+ ):
+ sim = compute_cosine_similarity(baseline_emb, target_emb)
+ similarities.append(sim)
+
+ return similarities
+
+ def check_tolerance(self, similarity: float) -> bool:
+ """
+ Check if similarity meets tolerance threshold.
+
+ :param similarity: Cosine similarity score to validate
+ :return: True if similarity is within tolerance (similarity >= 1.0 - tolerance)
+
+ Example:
+ ::
+ # With tolerance=1e-2 (0.01)
+ validator.check_tolerance(0.99) # True (within 1% of perfect)
+ validator.check_tolerance(0.985) # False (outside tolerance)
+ """
+ return similarity >= (1.0 - self.tolerance)
+
+ def check_self_consistency(
+ self,
+ _text: str,
+ embeddings: list[NDArray[np.float32] | list[float]],
+ tolerance: float | None = None,
+ ) -> tuple[float, bool]:
+ """
+ Verify that same input produces consistent embeddings.
+
+ Self-consistency check ensures the model produces identical (or nearly
+ identical) embeddings for the same input text across multiple inferences.
+
+ :param text: Input text (same for all embeddings)
+ :param embeddings: List of embeddings from repeated encodings of the same text
+ :param tolerance: Optional tolerance override (uses instance tolerance if None)
+ :return: Tuple of (mean_similarity, is_consistent)
+
+ Example:
+ ::
+ text = "Consistency test"
+ embeddings = [model.encode(text) for _ in range(5)]
+ mean_sim, is_consistent = validator.check_self_consistency(text, embeddings)
+ # Should be near 1.0 for deterministic models
+ """
+ if len(embeddings) < 2: # noqa: PLR2004
+ # Need at least 2 embeddings to compare
+ return 1.0, True
+
+ tolerance_threshold = tolerance if tolerance is not None else self.tolerance
+
+ # Compute pairwise similarities
+ similarities = []
+ for i in range(len(embeddings)):
+ for j in range(i + 1, len(embeddings)):
+ sim = compute_cosine_similarity(embeddings[i], embeddings[j])
+ similarities.append(sim)
+
+ # Compute mean similarity
+ mean_similarity = float(np.mean(similarities))
+
+ # Check if all comparisons meet tolerance
+ is_consistent = mean_similarity >= (1.0 - tolerance_threshold)
+
+ return mean_similarity, is_consistent
+
+ def get_embedding_stats(
+ self, embeddings: NDArray[np.float32] | list[list[float]]
+ ) -> dict[str, float]:
+ """
+ Compute statistical properties of embeddings.
+
+ :param embeddings: Embeddings array (shape: [n, dim])
+ :return: Dictionary with statistics (mean_norm, std_norm, mean_value, std_value)
+
+ Example:
+ ::
+ embeddings = model.encode(texts)
+ stats = validator.get_embedding_stats(embeddings)
+ print(f"Mean norm: {stats['mean_norm']:.4f}")
+ """
+ emb_array = np.array(embeddings, dtype=np.float32)
+
+ # Compute norms
+ norms = np.linalg.norm(emb_array, axis=1)
+
+ # Compute value statistics
+ mean_value = float(np.mean(emb_array))
+ std_value = float(np.std(emb_array))
+
+ return {
+ "mean_norm": float(np.mean(norms)),
+ "std_norm": float(np.std(norms)),
+ "mean_value": mean_value,
+ "std_value": std_value,
+ "min_value": float(np.min(emb_array)),
+ "max_value": float(np.max(emb_array)),
+ }
diff --git a/src/guidellm/benchmark/schemas/__init__.py b/src/guidellm/benchmark/schemas/__init__.py
index 0b9fd0a9c..13cc4a0bc 100644
--- a/src/guidellm/benchmark/schemas/__init__.py
+++ b/src/guidellm/benchmark/schemas/__init__.py
@@ -20,6 +20,16 @@
BenchmarkConfig,
BenchmarkT,
)
+from .embeddings import (
+ BenchmarkEmbeddingsArgs,
+ EmbeddingsBenchmark,
+ EmbeddingsBenchmarkAccumulator,
+ EmbeddingsBenchmarkMetadata,
+ EmbeddingsBenchmarksReport,
+ EmbeddingsBenchmarkTimings,
+ EmbeddingsMetrics,
+ EmbeddingsQualityMetrics,
+)
from .generative import (
BenchmarkGenerativeTextArgs,
GenerativeAudioMetricsSummary,
@@ -45,8 +55,16 @@
"BenchmarkAccumulator",
"BenchmarkAccumulatorT",
"BenchmarkConfig",
+ "BenchmarkEmbeddingsArgs",
"BenchmarkGenerativeTextArgs",
"BenchmarkT",
+ "EmbeddingsBenchmark",
+ "EmbeddingsBenchmarkAccumulator",
+ "EmbeddingsBenchmarkMetadata",
+ "EmbeddingsBenchmarkTimings",
+ "EmbeddingsBenchmarksReport",
+ "EmbeddingsMetrics",
+ "EmbeddingsQualityMetrics",
"GenerativeAudioMetricsSummary",
"GenerativeBenchmark",
"GenerativeBenchmarkAccumulator",
diff --git a/src/guidellm/benchmark/schemas/base.py b/src/guidellm/benchmark/schemas/base.py
index 9a41171f0..9370c215b 100644
--- a/src/guidellm/benchmark/schemas/base.py
+++ b/src/guidellm/benchmark/schemas/base.py
@@ -273,7 +273,7 @@ class BenchmarkConfig(StandardBaseDict):
description="Constraint definitions applied to scheduler strategy execution",
)
sample_requests: int | None = Field(
- default=20,
+ default=None,
description="Request count for statistical sampling in final metrics",
)
warmup: TransientPhaseConfig = Field(
diff --git a/src/guidellm/benchmark/schemas/embeddings/__init__.py b/src/guidellm/benchmark/schemas/embeddings/__init__.py
new file mode 100644
index 000000000..6f62128df
--- /dev/null
+++ b/src/guidellm/benchmark/schemas/embeddings/__init__.py
@@ -0,0 +1,47 @@
+"""
+Embeddings benchmark schemas for performance measurement and analysis.
+
+This module provides the complete schema ecosystem for executing, tracking, and
+analyzing embeddings benchmarks. It encompasses configuration entrypoints for
+benchmark setup, real-time metric accumulators for execution monitoring,
+comprehensive result containers with statistical summaries, multi-benchmark
+reporting capabilities, and optional quality validation metrics including cosine
+similarity and MTEB benchmarks.
+"""
+
+from __future__ import annotations
+
+from .accumulator import (
+ EmbeddingsBenchmarkAccumulator,
+ EmbeddingsBenchmarkTimings,
+ EmbeddingsMetricsAccumulator,
+ EmbeddingsQualityMetricsAccumulator,
+ EmbeddingsRequestsAccumulator,
+ RunningMetricStats,
+ SchedulerMetricsAccumulator,
+)
+from .benchmark import EmbeddingsBenchmark
+from .entrypoints import BenchmarkEmbeddingsArgs
+from .metrics import (
+ EmbeddingsMetrics,
+ EmbeddingsQualityMetrics,
+ SchedulerMetrics,
+)
+from .report import EmbeddingsBenchmarkMetadata, EmbeddingsBenchmarksReport
+
+__all__ = [
+ "BenchmarkEmbeddingsArgs",
+ "EmbeddingsBenchmark",
+ "EmbeddingsBenchmarkAccumulator",
+ "EmbeddingsBenchmarkMetadata",
+ "EmbeddingsBenchmarkTimings",
+ "EmbeddingsBenchmarksReport",
+ "EmbeddingsMetrics",
+ "EmbeddingsMetricsAccumulator",
+ "EmbeddingsQualityMetrics",
+ "EmbeddingsQualityMetricsAccumulator",
+ "EmbeddingsRequestsAccumulator",
+ "RunningMetricStats",
+ "SchedulerMetrics",
+ "SchedulerMetricsAccumulator",
+]
diff --git a/src/guidellm/benchmark/schemas/embeddings/accumulator.py b/src/guidellm/benchmark/schemas/embeddings/accumulator.py
new file mode 100644
index 000000000..74eeb4ba5
--- /dev/null
+++ b/src/guidellm/benchmark/schemas/embeddings/accumulator.py
@@ -0,0 +1,680 @@
+"""
+Real-time metric accumulation for embeddings benchmark execution.
+
+Captures and computes performance metrics during embeddings benchmark runs, tracking
+timing phases, request statistics, input token throughput, and latency distributions.
+Unlike generative workloads, embeddings do not have output tokens or streaming behavior,
+so this accumulator focuses on input processing metrics and optional quality validation
+metrics like cosine similarity.
+"""
+
+from __future__ import annotations
+
+import random
+from typing import Literal
+
+from pydantic import Field
+
+from guidellm.benchmark.schemas.base import BenchmarkAccumulator, BenchmarkConfig
+from guidellm.scheduler import MultiTurnRequestT, SchedulerState
+from guidellm.schemas import (
+ EmbeddingsRequestStats,
+ GenerationRequest,
+ GenerationResponse,
+ RequestInfo,
+ StandardBaseModel,
+ StatusBreakdown,
+ StatusDistributionSummary,
+)
+
+__all__ = [
+ "EmbeddingsBenchmarkAccumulator",
+ "EmbeddingsBenchmarkTimings",
+ "EmbeddingsMetricsAccumulator",
+ "EmbeddingsQualityMetricsAccumulator",
+ "EmbeddingsRequestsAccumulator",
+ "RunningMetricStats",
+ "SchedulerMetricsAccumulator",
+]
+
+
+class EmbeddingsBenchmarkTimings(StandardBaseModel):
+ """
+ Tracks timing phases and transitions during embeddings benchmark execution.
+
+ Monitors timestamps throughout benchmark execution including request submission,
+ measurement period boundaries (warmup/active/cooldown), and completion events.
+ """
+
+ request_start: float | None = Field(
+ description="Timestamp when the first request was sent", default=None
+ )
+ measure_start: float | None = Field(
+ description="Timestamp when measurement period started", default=None
+ )
+ measure_end: float | None = Field(
+ description="Timestamp when measurement period ended", default=None
+ )
+ request_end: float | None = Field(
+ description="Timestamp when the last request was completed", default=None
+ )
+ current_update: float | None = Field(
+ description="Most recent timestamp observed during execution", default=None
+ )
+ current_request: float | None = Field(
+ description="Most recent request completion timestamp observed", default=None
+ )
+ last_update: float | None = Field(
+ description="Previous timestamp observed before the current one", default=None
+ )
+ last_request: float | None = Field(
+ description="Previous request completion timestamp before the current one",
+ default=None,
+ )
+
+ @property
+ def status(self) -> Literal["pending", "warmup", "active", "cooldown"]:
+ """
+ :return: Current execution phase based on timing thresholds
+ """
+ if self.request_start is None or self.current_update is None:
+ return "pending"
+
+ if self.measure_start is None or self.current_update <= self.measure_start:
+ return "warmup"
+
+ if self.measure_end is not None and self.current_update >= self.measure_end:
+ return "cooldown"
+
+ return "active"
+
+ @property
+ def duration(self) -> float:
+ """
+ :return: Elapsed time since measurement or request start in seconds
+ """
+ if self.request_start is None or self.current_update is None:
+ return 0.0
+
+ return self.current_update - self.request_start
+
+ @property
+ def elapsed_time_last_update(self) -> float:
+ """
+ :return: Time elapsed since last update
+ """
+ if self.current_update is None or self.last_update is None:
+ return 0.0
+
+ return self.current_update - self.last_update
+
+ @property
+ def finalized_request_start(self) -> float:
+ """
+ :return: Finalized timestamp for when requests started
+ """
+ return self.request_start or -1.0
+
+ @property
+ def finalized_measure_start(self) -> float:
+ """
+ :return: Finalized timestamp for when measurement started
+ """
+ return self.measure_start or self.finalized_request_start
+
+ @property
+ def finalized_measure_end(self) -> float:
+ """
+ :return: Finalized timestamp for when measurement ended
+ """
+ return self.measure_end or self.finalized_request_end
+
+ @property
+ def finalized_request_end(self) -> float:
+ """
+ :return: Finalized timestamp for when requests ended
+ """
+ return self.request_end or self.current_request or -1.0
+
+ def update_estimate(
+ self,
+ info: RequestInfo,
+ scheduler_state: SchedulerState,
+ config: BenchmarkConfig,
+ ):
+ """
+ Update timing estimates based on request info and scheduler state.
+
+ :param info: Request information containing timing data
+ :param scheduler_state: Current scheduler state with progress metrics
+ :param config: Benchmark configuration with warmup/cooldown settings
+ """
+ # Update non-terminal timestamps
+ self.request_start = scheduler_state.start_requests_time
+ self.last_update = self.current_update
+ if (current_time := info.timings.last_reported) is not None:
+ self.current_update = (
+ current_time
+ if self.current_update is None
+ else max(self.current_update, current_time)
+ )
+
+ # Update measurement period timestamps
+ warmup_active, measure_start = config.warmup.compute_transition_time(
+ info=info, state=scheduler_state, period="start"
+ )
+ if not warmup_active:
+ self.measure_start = self.request_start
+ elif measure_start is not None:
+ self.measure_start = measure_start
+
+ cooldown_active, measure_end = config.cooldown.compute_transition_time(
+ info=info, state=scheduler_state, period="end"
+ )
+ if cooldown_active and measure_end is not None:
+ self.measure_end = measure_end
+
+ # Update terminal timestamps for completed requests
+ if info.status in {"completed", "errored", "cancelled"}:
+ self.last_request = self.current_request
+ if info.completed_at is not None and (
+ self.current_request is None or info.completed_at > self.current_request
+ ):
+ self.current_request = info.completed_at
+
+ # Update request stop timestamps
+ if scheduler_state.end_processing_time is not None and self.request_end is None:
+ self.request_end = (
+ scheduler_state.progress.stop_time
+ or self.current_request
+ or scheduler_state.end_processing_time
+ )
+ if self.measure_end is None:
+ self.measure_end = self.request_end
+
+
+class RunningMetricStats(StandardBaseModel):
+ """
+ Maintains running statistics for a metric stream without storing all samples.
+
+ Accumulates count, sum, time-weighted sum, and duration for efficient
+ real-time metric tracking during long-running benchmarks.
+ """
+
+ count: int = Field(description="Number of samples accumulated", default=0)
+ value_sum: float = Field(description="Total sum of accumulated values", default=0.0)
+ time_weighted_sum: float = Field(
+ description="Time-weighted sum of accumulated values", default=0.0
+ )
+ duration: float = Field(
+ description="Total duration over which values were accumulated", default=0.0
+ )
+ last_value: float | None = Field(
+ description="Most recent value added to the accumulator", default=None
+ )
+
+ @property
+ def mean(self) -> float | None:
+ """
+ :return: Arithmetic mean of accumulated values, or None if no samples
+ """
+ if self.count <= 0:
+ return None
+ return self.value_sum / self.count
+
+ @property
+ def time_weighted_mean(self) -> float | None:
+ """
+ :return: Time-weighted mean considering duration between samples, or None
+ """
+ if self.duration <= 0.0:
+ return None
+ return self.time_weighted_sum / self.duration
+
+ @property
+ def rate_per_item(self) -> float | None:
+ """
+ :return: Average value per accumulated item, or None if no samples
+ """
+ if self.count <= 0:
+ return None
+ return self.value_sum / self.count
+
+ @property
+ def rate_per_second(self) -> float | None:
+ """
+ :return: Average value per second of duration, or None if no duration
+ """
+ if self.duration <= 0.0:
+ return None
+ return self.value_sum / self.duration
+
+ def update_estimate(
+ self,
+ value: float | None,
+ count: int = 1,
+ duration: float | None = None,
+ elapsed: float | None = None,
+ ):
+ """
+ Incorporate a new metric value into running statistics.
+
+ Updates count, sum, and time-weighted statistics using the new value and timing
+ information. Time-weighted calculations use the previous value over the elapsed
+ interval to capture sustained metric behavior.
+
+ :param value: New metric value to accumulate
+ :param count: Number of occurrences this value represents
+ :param duration: Total duration to set, overriding incremental elapsed updates
+ :param elapsed: Time elapsed since last update for time-weighted calculations
+ """
+ self.count += count
+ self.value_sum += (value or 0.0) * count
+
+ if elapsed is not None:
+ self.time_weighted_sum += (self.last_value or 0.0) * elapsed
+
+ self.duration = (
+ duration if duration is not None else (self.duration + (elapsed or 0.0))
+ )
+ self.last_value = value
+
+
+class SchedulerMetricsAccumulator(StandardBaseModel):
+ """
+ Tracks scheduler-level timing and overhead metrics during execution.
+ """
+
+ start_time: float = Field(description="Scheduler start timestamp", default=0.0)
+ request_start_time: float = Field(
+ description="First request timestamp", default=0.0
+ )
+ measure_start_time: float = Field(
+ description="Measurement start timestamp", default=0.0
+ )
+ measure_end_time: float = Field(
+ description="Measurement end timestamp", default=0.0
+ )
+ request_end_time: float = Field(description="Last request timestamp", default=0.0)
+ end_time: float = Field(description="Scheduler end timestamp", default=0.0)
+
+ requests_made: StatusBreakdown[int, int, int, int] = Field(
+ description="Request counts by status",
+ default_factory=lambda: StatusBreakdown[int, int, int, int](
+ successful=0, errored=0, incomplete=0, total=0
+ ),
+ )
+
+ # Running metrics for progress tracking (compatible with generative)
+ queued_time: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Running stats for time requests spent in the queue",
+ )
+ resolve_start_delay: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Running stats for delay before worker starts resolving",
+ )
+ resolve_targeted_start_delay: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Running stats for delay to targeted resolve start",
+ )
+ request_start_delay: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Running stats for delay from resolve to request start",
+ )
+ request_targeted_start_delay: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Running stats for delay to targeted request start",
+ )
+ resolve_end_delay: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Running stats for delay after request end till worker resolves",
+ )
+
+ # Sum fields for final compilation
+ queued_time_sum: float = Field(
+ description="Total time requests spent in queue", default=0.0
+ )
+ resolve_start_delay_sum: float = Field(
+ description="Total delay before worker starts resolving", default=0.0
+ )
+ resolve_targeted_start_delay_sum: float = Field(
+ description="Total delay to targeted resolve start", default=0.0
+ )
+ request_start_delay_sum: float = Field(
+ description="Total delay from resolve to request start", default=0.0
+ )
+ resolve_time_sum: float = Field(
+ description="Total resolution time", default=0.0
+ )
+
+ def update_estimate(
+ self, scheduler_state: SchedulerState, stats: EmbeddingsRequestStats
+ ):
+ """
+ Update scheduler metrics with completed request timing data.
+
+ :param scheduler_state: Current scheduler state
+ :param stats: Completed request statistics
+ """
+ # Update request counts
+ self.requests_made.successful = scheduler_state.successful_requests
+ self.requests_made.errored = scheduler_state.errored_requests
+ self.requests_made.incomplete = scheduler_state.cancelled_requests
+ self.requests_made.total = (
+ scheduler_state.successful_requests
+ + scheduler_state.errored_requests
+ + scheduler_state.cancelled_requests
+ )
+
+ # Update timing sums and running stats
+ timings = stats.info.timings
+ if timings.queued is not None and timings.dequeued is not None:
+ queued_time_val = timings.dequeued - timings.queued
+ self.queued_time_sum += queued_time_val
+ self.queued_time.update_estimate(value=queued_time_val)
+
+ if timings.dequeued is not None and timings.resolve_start is not None:
+ resolve_start_delay_val = timings.resolve_start - timings.dequeued
+ self.resolve_start_delay_sum += resolve_start_delay_val
+ self.resolve_start_delay.update_estimate(value=resolve_start_delay_val)
+
+ if timings.targeted_start is not None and timings.resolve_start is not None:
+ resolve_targeted_delay_val = timings.resolve_start - timings.targeted_start
+ self.resolve_targeted_start_delay_sum += resolve_targeted_delay_val
+ self.resolve_targeted_start_delay.update_estimate(
+ value=resolve_targeted_delay_val
+ )
+
+ if timings.resolve_start is not None and timings.request_start is not None:
+ request_start_delay_val = timings.request_start - timings.resolve_start
+ self.request_start_delay_sum += request_start_delay_val
+ self.request_start_delay.update_estimate(value=request_start_delay_val)
+
+ if timings.targeted_start is not None and timings.request_start is not None:
+ request_targeted_delay_val = (
+ timings.request_start - timings.targeted_start
+ )
+ self.request_targeted_start_delay.update_estimate(
+ value=request_targeted_delay_val
+ )
+
+ if timings.request_end is not None and timings.resolve_end is not None:
+ resolve_end_delay_val = timings.resolve_end - timings.request_end
+ self.resolve_end_delay.update_estimate(value=resolve_end_delay_val)
+
+ if timings.resolve_start is not None and timings.resolve_end is not None:
+ resolve_time_val = timings.resolve_end - timings.resolve_start
+ self.resolve_time_sum += resolve_time_val
+
+
+class EmbeddingsQualityMetricsAccumulator(StandardBaseModel):
+ """
+ Accumulates quality validation metrics for embeddings.
+
+ Tracks cosine similarity scores and MTEB benchmark results when quality
+ validation is enabled.
+ """
+
+ cosine_similarities: list[float] = Field(
+ default_factory=list,
+ description="Cosine similarity scores against baseline",
+ )
+ baseline_cosine_similarity: StatusDistributionSummary | None = Field(
+ default=None,
+ description="Compiled cosine similarity distribution",
+ )
+ self_consistency_score: StatusDistributionSummary | None = Field(
+ default=None,
+ description="Compiled self-consistency scores",
+ )
+ mteb_main_score: float | None = Field(
+ default=None,
+ description="MTEB main score (if evaluated)",
+ )
+ mteb_task_scores: dict[str, float] | None = Field(
+ default=None,
+ description="Individual MTEB task scores",
+ )
+
+
+class EmbeddingsCompletedMetricsAccumulator(StandardBaseModel):
+ """
+ Tracks real-time metrics for completed embeddings requests.
+
+ Used for progress tracking during benchmark execution.
+ """
+
+ requests: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Requests completion metrics",
+ )
+ request_latency: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Request latency running stats",
+ )
+ prompt_tokens: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Input tokens running stats",
+ )
+ total_tokens: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Total tokens (same as prompt for embeddings)",
+ )
+
+
+class EmbeddingsMetricsAccumulator(StandardBaseModel):
+ """
+ Accumulates performance metrics during embeddings benchmark execution.
+
+ Tracks request latency, throughput, and input token metrics. Does not track
+ output tokens or streaming metrics (no TTFT/ITL for embeddings).
+ """
+
+ requests_per_second: StatusDistributionSummary = Field(
+ default_factory=StatusDistributionSummary,
+ description="Requests per second distribution",
+ )
+ request_concurrency: StatusDistributionSummary = Field(
+ default_factory=StatusDistributionSummary,
+ description="Request concurrency distribution",
+ )
+ request_latency: StatusDistributionSummary = Field(
+ default_factory=StatusDistributionSummary,
+ description="Request latency distribution",
+ )
+ input_tokens_per_second: StatusDistributionSummary = Field(
+ default_factory=StatusDistributionSummary,
+ description="Input tokens per second distribution",
+ )
+
+
+class EmbeddingsRequestsAccumulator(StandardBaseModel):
+ """
+ Accumulates embeddings request statistics during benchmark execution.
+
+ Uses reservoir sampling to maintain a representative sample of requests
+ across different status categories.
+ """
+
+ successful: list[EmbeddingsRequestStats] = Field(
+ default_factory=list,
+ description="Sample of successful embeddings requests",
+ )
+ incomplete: list[EmbeddingsRequestStats] = Field(
+ default_factory=list,
+ description="Sample of incomplete embeddings requests",
+ )
+ errored: list[EmbeddingsRequestStats] = Field(
+ default_factory=list,
+ description="Sample of errored embeddings requests",
+ )
+
+
+class EmbeddingsBenchmarkAccumulator(
+ BenchmarkAccumulator[GenerationRequest, GenerationResponse]
+):
+ """
+ Accumulates metrics during embeddings benchmark execution.
+
+ Extends BenchmarkAccumulator with embeddings-specific metric tracking including
+ input token processing, request latency, and optional quality validation metrics.
+ Does not track output tokens or streaming behavior.
+ """
+
+ type_: Literal["embeddings_benchmark_accumulator"] = (
+ "embeddings_benchmark_accumulator"
+ )
+
+ # Core accumulators
+ timings: EmbeddingsBenchmarkTimings = Field(
+ default_factory=EmbeddingsBenchmarkTimings,
+ description="Timing phase tracking",
+ )
+ scheduler_metrics: SchedulerMetricsAccumulator = Field(
+ default_factory=SchedulerMetricsAccumulator,
+ description="Scheduler metrics accumulation",
+ )
+ concurrency_metric: RunningMetricStats = Field(
+ default_factory=RunningMetricStats,
+ description="Time-weighted concurrency statistics",
+ )
+ completed_metrics: EmbeddingsCompletedMetricsAccumulator = Field(
+ default_factory=EmbeddingsCompletedMetricsAccumulator,
+ description="Real-time metrics for completed requests",
+ )
+ metrics: EmbeddingsMetricsAccumulator = Field(
+ default_factory=EmbeddingsMetricsAccumulator,
+ description="Performance metrics accumulation",
+ )
+ requests: EmbeddingsRequestsAccumulator = Field(
+ default_factory=EmbeddingsRequestsAccumulator,
+ description="Request statistics accumulation",
+ )
+
+ # Quality validation (optional)
+ quality_enabled: bool = Field(
+ default=False,
+ description="Whether quality validation is enabled",
+ )
+ quality: EmbeddingsQualityMetricsAccumulator | None = Field(
+ default=None,
+ description="Quality metrics accumulation (when enabled)",
+ )
+
+ # Encoding format tracking
+ encoding_format_breakdown: dict[str, int] = Field(
+ default_factory=dict,
+ description="Request count by encoding format",
+ )
+
+ # Reservoir sampling parameters
+ _sampling_counts: dict[str, int] = {}
+ _max_samples: int = 1000
+
+ def update_estimate( # noqa: C901, PLR0912
+ self,
+ response: GenerationResponse | None,
+ request: GenerationRequest | MultiTurnRequestT[GenerationRequest],
+ info: RequestInfo,
+ scheduler_state: SchedulerState,
+ ):
+ """
+ Update accumulated metrics with a new request completion.
+
+ :param response: Response from the backend (if successful)
+ :param request: Original generation request
+ :param info: Request metadata and timing information
+ :param scheduler_state: Current scheduler state
+ """
+ # Update timing state
+ self.timings.update_estimate(info, scheduler_state, self.config)
+ duration = self.timings.duration
+ self.concurrency_metric.update_estimate(
+ value=scheduler_state.processing_requests,
+ duration=duration,
+ )
+
+ # Determine request status and target accumulator
+ if info.status == "completed":
+ status_key = "completed"
+ status_list = self.requests.successful
+ elif info.status == "errored":
+ status_key = "errored"
+ status_list = self.requests.errored
+ elif info.status == "cancelled" and info.timings.resolve_start is not None:
+ status_key = "incomplete"
+ status_list = self.requests.incomplete
+ else:
+ # Not a terminal status or cancelled before starting
+ # Do not include in requests or metrics
+ return
+
+ # Build request stats
+ # Use response metrics if available (has actual token counts from server),
+ # otherwise fall back to request metrics (word/char counts only)
+ if isinstance(request, GenerationRequest):
+ request_input_metrics = request.input_metrics
+ else:
+ # For multi-turn requests, extract the first request
+ first_req = request[0] if isinstance(request, list | tuple) else None
+ if isinstance(first_req, tuple):
+ request_input_metrics = first_req[0].input_metrics
+ elif isinstance(first_req, GenerationRequest):
+ request_input_metrics = first_req.input_metrics
+ else:
+ request_input_metrics = None
+
+ input_metrics = (
+ response.input_metrics if response is not None else request_input_metrics
+ )
+ stats = EmbeddingsRequestStats(
+ request_id=info.request_id,
+ info=info,
+ input_metrics=input_metrics,
+ )
+
+ # Track encoding format if available
+ if isinstance(request, GenerationRequest) and hasattr(
+ request, "encoding_format"
+ ):
+ format_key = request.encoding_format or "float"
+ self.encoding_format_breakdown[format_key] = (
+ self.encoding_format_breakdown.get(format_key, 0) + 1
+ )
+
+ # Update scheduler metrics
+ self.scheduler_metrics.update_estimate(scheduler_state, stats)
+
+ # Update completed metrics for progress tracking (only for completed requests)
+ if status_key == "completed":
+ self.completed_metrics.requests.update_estimate(
+ value=1.0,
+ count=1,
+ duration=self.timings.duration,
+ )
+ if stats.request_latency is not None:
+ self.completed_metrics.request_latency.update_estimate(
+ value=stats.request_latency,
+ count=1,
+ )
+ if stats.prompt_tokens is not None:
+ self.completed_metrics.prompt_tokens.update_estimate(
+ value=float(stats.prompt_tokens),
+ count=1,
+ )
+ self.completed_metrics.total_tokens.update_estimate(
+ value=float(stats.prompt_tokens),
+ count=1,
+ )
+
+ # Reservoir sampling
+ sample_count = self._sampling_counts.get(status_key, 0)
+ if len(status_list) < self._max_samples:
+ status_list.append(stats)
+ else:
+ # Replace with decreasing probability
+ j = random.randint(0, sample_count)
+ if j < self._max_samples:
+ status_list[j] = stats
+ self._sampling_counts[status_key] = sample_count + 1
diff --git a/src/guidellm/benchmark/schemas/embeddings/benchmark.py b/src/guidellm/benchmark/schemas/embeddings/benchmark.py
new file mode 100644
index 000000000..7991ea56b
--- /dev/null
+++ b/src/guidellm/benchmark/schemas/embeddings/benchmark.py
@@ -0,0 +1,160 @@
+"""
+Benchmark data models and metrics for embeddings performance measurement.
+
+Provides comprehensive data structures for capturing, storing, and analyzing
+benchmark results from scheduler-driven embeddings workload executions. Core
+abstractions include embeddings-specific metrics without output tokens or streaming
+behavior, request-level statistics tracking, and multi-benchmark reporting capabilities.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import Field, computed_field
+
+from guidellm.benchmark.schemas.base import Benchmark, BenchmarkConfig
+from guidellm.benchmark.schemas.embeddings.accumulator import (
+ EmbeddingsBenchmarkAccumulator,
+)
+from guidellm.benchmark.schemas.embeddings.metrics import (
+ EmbeddingsMetrics,
+ SchedulerMetrics,
+)
+from guidellm.scheduler import SchedulerState
+from guidellm.schemas import (
+ EmbeddingsRequestStats,
+ StatusBreakdown,
+ StatusDistributionSummary,
+)
+
+__all__ = ["EmbeddingsBenchmark"]
+
+
+class EmbeddingsBenchmark(Benchmark[EmbeddingsBenchmarkAccumulator]):
+ """
+ Complete embeddings benchmark results with specialized metrics.
+
+ Encapsulates comprehensive performance data from scheduler-driven embeddings
+ workload executions including request-level statistics, input token metrics,
+ latency distributions, and optional quality validation metrics. Unlike generative
+ benchmarks, does not track output tokens or streaming behavior.
+ """
+
+ type_: Literal["embeddings_benchmark"] = "embeddings_benchmark" # type: ignore[assignment]
+
+ config: BenchmarkConfig = Field(
+ description="Configuration parameters for this benchmark execution",
+ )
+ scheduler_state: SchedulerState = Field(
+ description="Final state of the scheduler after benchmark completion",
+ )
+ scheduler_metrics: SchedulerMetrics = Field(
+ description="Scheduler timing and performance statistics",
+ )
+ metrics: EmbeddingsMetrics = Field(
+ description="Performance metrics and statistical distributions",
+ )
+ requests: StatusBreakdown[
+ list[EmbeddingsRequestStats],
+ list[EmbeddingsRequestStats],
+ list[EmbeddingsRequestStats],
+ None,
+ ] = Field(
+ description=(
+ "Request details grouped by status: successful, incomplete, errored"
+ ),
+ )
+
+ @computed_field # type: ignore[prop-decorator]
+ @property
+ def start_time(self) -> float:
+ """
+ :return: Benchmark start time in seconds since epoch
+ """
+ return self.scheduler_metrics.measure_start_time
+
+ @computed_field # type: ignore[prop-decorator]
+ @property
+ def end_time(self) -> float:
+ """
+ :return: Benchmark end time in seconds since epoch
+ """
+ return self.scheduler_metrics.measure_end_time
+
+ @computed_field # type: ignore[prop-decorator]
+ @property
+ def duration(self) -> float:
+ """
+ :return: Total benchmark execution duration in seconds
+ """
+ return self.end_time - self.start_time
+
+ @computed_field # type: ignore[prop-decorator]
+ @property
+ def warmup_duration(self) -> float:
+ """
+ :return: Warmup phase duration in seconds
+ """
+ return (
+ self.scheduler_metrics.measure_start_time
+ - self.scheduler_metrics.request_start_time
+ )
+
+ @computed_field # type: ignore[prop-decorator]
+ @property
+ def cooldown_duration(self) -> float:
+ """
+ :return: Cooldown phase duration in seconds
+ """
+ return (
+ self.scheduler_metrics.request_end_time
+ - self.scheduler_metrics.measure_end_time
+ )
+
+ @property
+ def request_latency(self) -> StatusDistributionSummary:
+ """
+ :return: Statistical distribution of request latencies across all requests
+ """
+ return self.metrics.request_latency
+
+ @property
+ def request_throughput(self) -> StatusDistributionSummary:
+ """
+ :return: Statistical distribution of throughput measured in requests per second
+ """
+ return self.metrics.requests_per_second
+
+ @property
+ def request_concurrency(self) -> StatusDistributionSummary:
+ """
+ :return: Statistical distribution of concurrent requests throughout execution
+ """
+ return self.metrics.request_concurrency
+
+ @classmethod
+ def compile(
+ cls,
+ accumulator: EmbeddingsBenchmarkAccumulator,
+ scheduler_state: SchedulerState,
+ ) -> EmbeddingsBenchmark:
+ """
+ Compile final benchmark results from accumulated execution state.
+
+ :param accumulator: Accumulated benchmark state with request statistics
+ :param scheduler_state: Final scheduler state after execution completion
+ :return: Compiled embeddings benchmark instance with complete metrics
+ """
+ return EmbeddingsBenchmark(
+ config=accumulator.config,
+ scheduler_state=scheduler_state,
+ scheduler_metrics=SchedulerMetrics.compile(accumulator, scheduler_state),
+ metrics=EmbeddingsMetrics.compile(accumulator, scheduler_state),
+ requests=StatusBreakdown(
+ successful=accumulator.requests.successful,
+ incomplete=accumulator.requests.incomplete,
+ errored=accumulator.requests.errored,
+ total=None,
+ ),
+ )
diff --git a/src/guidellm/benchmark/schemas/embeddings/entrypoints.py b/src/guidellm/benchmark/schemas/embeddings/entrypoints.py
new file mode 100644
index 000000000..f205e09eb
--- /dev/null
+++ b/src/guidellm/benchmark/schemas/embeddings/entrypoints.py
@@ -0,0 +1,311 @@
+"""
+Configuration entrypoints for embeddings benchmark execution.
+
+Defines parameter schemas for creating embeddings benchmark runs from scenario files
+or runtime arguments. Extends standard benchmark configuration with embeddings-specific
+options including quality validation settings (baseline model, cosine similarity
+tolerance) and MTEB benchmark integration.
+"""
+
+from __future__ import annotations
+
+import inspect
+import json
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any, Literal
+
+import yaml
+from pydantic import (
+ AliasChoices,
+ AliasGenerator,
+ ConfigDict,
+ Field,
+ field_serializer,
+)
+from torch.utils.data import Sampler
+from transformers import PreTrainedTokenizerBase
+
+from guidellm.backends import Backend, BackendType
+from guidellm.benchmark.profiles import Profile, ProfileType
+from guidellm.benchmark.scenarios import get_builtin_scenarios
+from guidellm.benchmark.schemas.base import TransientPhaseConfig
+from guidellm.data import DatasetFinalizer, DatasetPreprocessor
+from guidellm.scheduler import StrategyType
+from guidellm.schemas import StandardBaseModel
+
+__all__ = ["BenchmarkEmbeddingsArgs"]
+
+
+class BenchmarkEmbeddingsArgs(StandardBaseModel):
+ """
+ Configuration arguments for embeddings benchmark execution.
+
+ Defines all parameters for embeddings benchmark setup including target endpoint,
+ data sources, backend configuration, processing pipeline, output formatting,
+ execution constraints, and embeddings-specific quality validation options.
+
+ Example::
+
+ # Basic embeddings benchmark
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000/v1",
+ data=["path/to/texts.json"],
+ profile="sweep"
+ )
+
+ # With quality validation
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000/v1",
+ data=["path/to/texts.json"],
+ enable_quality_validation=True,
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2",
+ quality_tolerance=1e-2
+ )
+
+ # With MTEB benchmarking
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000/v1",
+ data=["path/to/texts.json"],
+ enable_mteb=True,
+ mteb_tasks=["STS12", "STS13"]
+ )
+ """
+
+ @classmethod
+ def create(
+ cls, scenario: Path | str | None, **kwargs: dict[str, Any]
+ ) -> BenchmarkEmbeddingsArgs:
+ """
+ Create benchmark args from scenario file and keyword arguments.
+
+ :param scenario: Path to scenario file, built-in scenario name, or None
+ :param kwargs: Keyword arguments to override scenario values
+ :return: Configured benchmark args instance
+ :raises ValueError: If scenario is not found or file format is unsupported
+ """
+ constructor_kwargs = {}
+
+ if scenario is not None:
+ if isinstance(scenario, str) and scenario in (
+ builtin_scenarios := get_builtin_scenarios()
+ ):
+ scenario_path = builtin_scenarios[scenario]
+ elif Path(scenario).exists() and Path(scenario).is_file():
+ scenario_path = Path(scenario)
+ else:
+ raise ValueError(f"Scenario '{scenario}' not found.")
+
+ with scenario_path.open() as file:
+ if scenario_path.suffix == ".json":
+ scenario_data = json.load(file)
+ elif scenario_path.suffix in {".yaml", ".yml"}:
+ scenario_data = yaml.safe_load(file)
+ else:
+ raise ValueError(
+ f"Unsupported scenario file format: {scenario_path.suffix}"
+ )
+ if "args" in scenario_data:
+ scenario_data = scenario_data["args"]
+ constructor_kwargs.update(scenario_data)
+
+ constructor_kwargs.update(kwargs)
+ return cls.model_validate(constructor_kwargs)
+
+ @classmethod
+ def get_default(cls: type[BenchmarkEmbeddingsArgs], field: str) -> Any:
+ """
+ Retrieve default value for a model field.
+
+ :param field: Field name to retrieve default value for
+ :return: Default value for the field
+ :raises ValueError: If field does not exist
+ """
+ if field not in cls.model_fields:
+ raise ValueError(f"Field '{field}' not found in {cls.__name__}")
+
+ field_info = cls.model_fields[field]
+ factory = field_info.default_factory
+
+ if factory is None:
+ return field_info.default
+
+ if len(inspect.signature(factory).parameters) == 0:
+ return factory() # type: ignore[call-arg]
+ else:
+ return factory({}) # type: ignore[call-arg]
+
+ model_config = ConfigDict(
+ extra="ignore",
+ use_enum_values=True,
+ from_attributes=True,
+ arbitrary_types_allowed=True,
+ validate_by_alias=True,
+ validate_by_name=True,
+ alias_generator=AliasGenerator(
+ validation_alias=lambda field_name: AliasChoices(
+ field_name, field_name.replace("_", "-")
+ ),
+ ),
+ )
+
+ # Required
+ target: str = Field(description="Target endpoint URL for benchmark execution")
+ data: list[Any] = Field(
+ description="List of dataset sources or data files",
+ default_factory=list,
+ min_length=1,
+ )
+
+ # Benchmark configuration
+ profile: StrategyType | ProfileType | Profile = Field(
+ default="sweep", description="Benchmark profile or scheduling strategy type"
+ )
+ rate: list[float] | None = Field(
+ default=None, description="Request rate(s) for rate-based scheduling"
+ )
+
+ # Backend configuration
+ backend: BackendType | Backend = Field(
+ default="openai_http", description="Backend type or instance for execution"
+ )
+ backend_kwargs: dict[str, Any] | None = Field(
+ default=None, description="Additional backend configuration arguments"
+ )
+ request_format: str | None = Field(
+ default=None,
+ description="Query format for backend operations"
+ )
+ model: str | None = Field(default=None, description="Model identifier for backend")
+
+ # Data configuration
+ processor: str | Path | PreTrainedTokenizerBase | None = Field(
+ default=None, description="Tokenizer path, name, or instance for processing"
+ )
+ processor_args: dict[str, Any] | None = Field(
+ default=None, description="Additional tokenizer configuration arguments"
+ )
+ data_args: list[dict[str, Any]] | None = Field(
+ default_factory=list, # type: ignore[arg-type]
+ description="Per-dataset configuration arguments",
+ )
+ data_samples: int = Field(
+ default=-1, description="Number of samples to use from datasets (-1 for all)"
+ )
+ data_column_mapper: (
+ DatasetPreprocessor
+ | dict[str, str | list[str]]
+ | Literal["embeddings_column_mapper"]
+ ) = Field(
+ default="embeddings_column_mapper",
+ description="Column mapping preprocessor for dataset fields",
+ )
+ data_preprocessors: list[DatasetPreprocessor | dict[str, str | list[str]] | str] = (
+ Field(
+ default_factory=list, # type: ignore[arg-type]
+ description="List of dataset preprocessors to apply in order",
+ )
+ )
+ data_preprocessors_kwargs: dict[str, Any] = Field(
+ default_factory=dict,
+ description="Global arguments for data preprocessors",
+ )
+ data_finalizer: DatasetFinalizer | str | dict[str, Any] = Field(
+ default="embeddings",
+ description="Finalizer for preparing data samples into requests",
+ )
+ data_collator: Callable | Literal["embeddings"] | None = Field(
+ default="embeddings", description="Data collator for batch processing"
+ )
+ data_sampler: Sampler[int] | Literal["shuffle"] | None = Field(
+ default=None, description="Data sampler for request ordering"
+ )
+ data_num_workers: int | None = Field(
+ default=0, description="Number of workers for data loading"
+ )
+ dataloader_kwargs: dict[str, Any] | None = Field(
+ default=None, description="Additional dataloader configuration arguments"
+ )
+ random_seed: int = Field(default=42, description="Random seed for reproducibility")
+
+ # Output configuration
+ outputs: list[str] | tuple[str] = Field(
+ default_factory=lambda: ["json", "csv", "html"],
+ description="Output types to create (json, csv, html)",
+ )
+ output_dir: str | Path = Field(
+ default_factory=Path.cwd,
+ description="Directory for saving output files",
+ )
+ output_kwargs: dict[str, Any] | None = Field(
+ default=None, description="Additional output formatter arguments"
+ )
+
+ # Constraint configuration
+ max_requests: int | None = Field(
+ default=None, description="Maximum number of requests to execute"
+ )
+ max_errors: int | None = Field(
+ default=None, description="Maximum allowed errors before stopping"
+ )
+ max_duration: float | None = Field(
+ default=None, description="Maximum duration in seconds"
+ )
+ warmup: TransientPhaseConfig | float | int | dict | None = Field(
+ default=None, description="Warmup phase configuration"
+ )
+ cooldown: TransientPhaseConfig | float | int | dict | None = Field(
+ default=None, description="Cooldown phase configuration"
+ )
+
+ # EMBEDDINGS-SPECIFIC: Quality validation options
+ enable_quality_validation: bool = Field(
+ default=False,
+ description="Enable quality validation against baseline model",
+ )
+ baseline_model: str | None = Field(
+ default=None,
+ description=(
+ "HuggingFace model for baseline comparison "
+ "(e.g., 'sentence-transformers/all-MiniLM-L6-v2')"
+ ),
+ )
+ quality_tolerance: float = Field(
+ default=1e-2,
+ description=(
+ "Cosine similarity tolerance threshold (1e-2 standard, 5e-4 MTEB-level)"
+ ),
+ )
+
+ # EMBEDDINGS-SPECIFIC: MTEB benchmark options
+ enable_mteb: bool = Field(
+ default=False,
+ description="Enable MTEB benchmark evaluation",
+ )
+ mteb_tasks: list[str] | None = Field(
+ default=None,
+ description=(
+ "MTEB tasks to evaluate (default: ['STS12', 'STS13', 'STSBenchmark'])"
+ ),
+ )
+
+ # EMBEDDINGS-SPECIFIC: Encoding format
+ encoding_format: Literal["float", "base64"] = Field(
+ default="float",
+ description="Embedding encoding format (float or base64)",
+ )
+
+ @field_serializer("output_dir")
+ def serialize_output_dir(self, value: Path) -> str:
+ """Serialize Path to string for JSON/YAML."""
+ return str(value)
+
+ @field_serializer("processor")
+ def serialize_processor(self, value: Any) -> str | None:
+ """Serialize processor to string representation."""
+ if value is None:
+ return None
+ if isinstance(value, str | Path):
+ return str(value)
+ # For PreTrainedTokenizer instances, return name_or_path
+ return getattr(value, "name_or_path", str(value))
diff --git a/src/guidellm/benchmark/schemas/embeddings/metrics.py b/src/guidellm/benchmark/schemas/embeddings/metrics.py
new file mode 100644
index 000000000..e6bf8a2ea
--- /dev/null
+++ b/src/guidellm/benchmark/schemas/embeddings/metrics.py
@@ -0,0 +1,412 @@
+"""
+Metrics schemas for embeddings benchmark results and performance analysis.
+
+This module defines comprehensive metric structures for tracking and analyzing
+embeddings benchmark performance including request statistics, input token metrics,
+and optional quality validation metrics such as cosine similarity and MTEB scores.
+It provides statistical summaries with distribution analysis across successful,
+incomplete, and errored requests, along with scheduler-level performance metrics
+for request processing and queueing behavior.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import Field
+
+from guidellm.benchmark.schemas.embeddings.accumulator import (
+ EmbeddingsBenchmarkAccumulator,
+)
+from guidellm.scheduler import SchedulerState
+from guidellm.schemas import (
+ StandardBaseDict,
+ StatusBreakdown,
+ StatusDistributionSummary,
+)
+
+__all__ = [
+ "EmbeddingsMetrics",
+ "EmbeddingsQualityMetrics",
+ "SchedulerMetrics",
+ "StatusTypes",
+ "TimedMetricTypeAlias",
+]
+
+
+TimedMetricTypeAlias = (
+ tuple[float, float, int | float | None, int | float | None] | None
+)
+"""Timed metric tuple containing start_time, end_time, input_value, and output_value."""
+
+StatusTypes = Literal["successful", "incomplete", "errored"]
+"""Request status category for metric compilation."""
+
+# Constants for tuple indexing
+_TIMED_METRIC_START_TIME_INDEX = 0
+_TIMED_METRIC_END_TIME_INDEX = 1
+_TIMED_METRIC_INPUT_VALUE_INDEX = 2
+_TIMED_METRIC_OUTPUT_VALUE_INDEX = 3
+
+
+class SchedulerMetrics(StandardBaseDict):
+ """
+ Scheduler timing and performance statistics.
+
+ Tracks overall benchmark timing, request counts by status, and detailed internal
+ scheduler performance metrics including queue times, processing delays, and
+ request execution statistics. Used to analyze scheduler efficiency and identify
+ bottlenecks in request processing pipelines.
+ """
+
+ # Overall timings for the scheduler
+ start_time: float = Field(
+ description="Unix timestamp when the benchmark run started"
+ )
+ request_start_time: float = Field(
+ description="Unix timestamp when first request was made"
+ )
+ measure_start_time: float = Field(
+ description="Unix timestamp when measurement period started"
+ )
+ measure_end_time: float = Field(
+ description="Unix timestamp when measurement period ended"
+ )
+ request_end_time: float = Field(
+ description="Unix timestamp when last request completed"
+ )
+ end_time: float = Field(description="Unix timestamp when the benchmark run ended")
+
+ # Request details tracked by the scheduler
+ requests_made: StatusBreakdown[int, int, int, int] = Field(
+ description="Request counts by status: successful, incomplete, errored, total"
+ )
+
+ # Scheduler internal performance timings
+ queued_time_avg: float = Field(
+ description="Avg time requests spent in the queue (seconds)"
+ )
+ resolve_start_delay_avg: float = Field(
+ description="Avg delay before worker begins resolving req after dequeue (sec)"
+ )
+ resolve_targeted_start_delay_avg: float = Field(
+ description="Avg delay to targeted resolve start time (seconds)"
+ )
+ request_start_delay_avg: float = Field(
+ description="Avg delay from resolve start to actual request start (seconds)"
+ )
+ resolve_time_avg: float = Field(
+ description="Avg total resolution time per request (seconds)"
+ )
+
+ @classmethod
+ def compile(
+ cls,
+ accumulator: EmbeddingsBenchmarkAccumulator,
+ scheduler_state: SchedulerState,
+ ) -> SchedulerMetrics:
+ """
+ Compile scheduler metrics from accumulator and scheduler state.
+
+ :param accumulator: Accumulator containing scheduler timing and request data
+ :param scheduler_state: Scheduler state with execution timing information
+ :return: Compiled SchedulerMetrics instance with timing statistics
+ """
+ num_requests = accumulator.scheduler_metrics.requests_made.total
+
+ # Avoid division by zero - use -1.0 to indicate no requests processed
+ if num_requests is None or num_requests == 0:
+ queued_time_avg = -1.0
+ resolve_start_delay_avg = -1.0
+ resolve_targeted_start_delay_avg = -1.0
+ request_start_delay_avg = -1.0
+ resolve_time_avg = -1.0
+ else:
+ queued_time_avg = (
+ accumulator.scheduler_metrics.queued_time_sum / num_requests
+ )
+ resolve_start_delay_avg = (
+ accumulator.scheduler_metrics.resolve_start_delay_sum
+ / num_requests
+ )
+ resolve_targeted_start_delay_avg = (
+ accumulator.scheduler_metrics
+ .resolve_targeted_start_delay_sum
+ / num_requests
+ )
+ request_start_delay_avg = (
+ accumulator.scheduler_metrics.request_start_delay_sum
+ / num_requests
+ )
+ resolve_time_avg = (
+ accumulator.scheduler_metrics.resolve_time_sum / num_requests
+ )
+
+ return SchedulerMetrics(
+ start_time=scheduler_state.start_time,
+ request_start_time=accumulator.timings.finalized_request_start,
+ measure_start_time=accumulator.timings.finalized_measure_start,
+ measure_end_time=accumulator.timings.finalized_measure_end,
+ request_end_time=accumulator.timings.finalized_request_end,
+ end_time=scheduler_state.end_time or -1.0,
+ requests_made=accumulator.scheduler_metrics.requests_made,
+ queued_time_avg=queued_time_avg,
+ resolve_start_delay_avg=resolve_start_delay_avg,
+ resolve_targeted_start_delay_avg=resolve_targeted_start_delay_avg,
+ request_start_delay_avg=request_start_delay_avg,
+ resolve_time_avg=resolve_time_avg,
+ )
+
+
+class EmbeddingsQualityMetrics(StandardBaseDict):
+ """
+ Quality validation metrics for embeddings.
+
+ Tracks cosine similarity scores against baseline models and MTEB benchmark
+ performance. These metrics provide insights into embedding quality beyond
+ raw performance measurements.
+ """
+
+ baseline_cosine_similarity: StatusDistributionSummary | None = Field(
+ default=None,
+ description="Cosine similarity distribution against baseline model (0.0-1.0)",
+ )
+ self_consistency_score: StatusDistributionSummary | None = Field(
+ default=None,
+ description="Self-consistency scores (same input → same embedding)",
+ )
+ mteb_main_score: float | None = Field(
+ default=None,
+ description="MTEB benchmark main score (average across tasks)",
+ )
+ mteb_task_scores: dict[str, float] | None = Field(
+ default=None,
+ description="Individual MTEB task scores (e.g., STS12, STS13)",
+ )
+
+
+class EmbeddingsMetrics(StandardBaseDict):
+ """
+ Performance and quality metrics for embeddings benchmarks.
+
+ Encapsulates comprehensive performance data from embeddings workload executions
+ including request-level statistics, input token metrics, and optional quality
+ validation metrics. Unlike generative metrics, embeddings metrics do not track
+ output tokens or streaming behavior (TTFT, ITL).
+ """
+
+ # Request statistics
+ request_totals: StatusBreakdown[int, int, int, int] = Field(
+ description="Total requests by status: successful, incomplete, errored, total"
+ )
+ requests_per_second: StatusDistributionSummary = Field(
+ description=(
+ "Requests per second distribution across measurement period"
+ )
+ )
+ request_concurrency: StatusDistributionSummary = Field(
+ description=(
+ "Concurrent requests distribution throughout execution"
+ )
+ )
+ request_latency: StatusDistributionSummary = Field(
+ description="Request latency distribution (seconds)"
+ )
+
+ # Input token metrics (no output tokens for embeddings)
+ input_tokens_count: StatusBreakdown[int, int, int, int] = Field(
+ description=(
+ "Total input tokens by status: successful, incomplete, "
+ "errored, total"
+ )
+ )
+ input_tokens_per_second: StatusDistributionSummary = Field(
+ description="Input tokens per second distribution"
+ )
+
+ # Dummy output token fields for progress tracker compatibility (always zero)
+ output_token_count: StatusBreakdown[int, int, int, int] = Field(
+ default_factory=lambda: StatusBreakdown[int, int, int, int](
+ successful=0, incomplete=0, errored=0, total=0
+ ),
+ description="Output tokens (always 0 for embeddings)",
+ )
+ output_tokens_per_second: StatusDistributionSummary = Field(
+ default_factory=StatusDistributionSummary,
+ description="Output tokens per second (always 0 for embeddings)",
+ )
+ prompt_token_count: StatusBreakdown[int, int, int, int] | None = Field(
+ default=None,
+ description="Same as input_tokens_count (for compatibility)",
+ )
+ tokens_per_second: StatusDistributionSummary | None = Field(
+ default=None,
+ description="Same as input_tokens_per_second (for compatibility)",
+ )
+
+ # Quality validation metrics (optional)
+ quality: EmbeddingsQualityMetrics | None = Field(
+ default=None,
+ description="Quality validation metrics (when enabled)",
+ )
+
+ # Encoding format breakdown
+ encoding_format_breakdown: dict[str, int] = Field(
+ default_factory=dict,
+ description=(
+ "Request count by encoding format (e.g., "
+ "{'float': 50, 'base64': 0})"
+ ),
+ )
+
+ @classmethod
+ def compile(
+ cls,
+ accumulator: EmbeddingsBenchmarkAccumulator,
+ _scheduler_state: SchedulerState,
+ ) -> EmbeddingsMetrics:
+ """
+ Compile final embeddings metrics from accumulated execution state.
+
+ :param accumulator: Accumulated benchmark state with request statistics
+ :param scheduler_state: Final scheduler state after execution completion
+ :return: Compiled embeddings metrics instance with complete statistics
+ """
+ # Compile request counts
+ request_totals = StatusBreakdown[int, int, int, int](
+ successful=len(accumulator.requests.successful),
+ incomplete=len(accumulator.requests.incomplete),
+ errored=len(accumulator.requests.errored),
+ total=(
+ len(accumulator.requests.successful)
+ + len(accumulator.requests.incomplete)
+ + len(accumulator.requests.errored)
+ ),
+ )
+
+ # Compile input token counts
+ input_tokens_count = StatusBreakdown[int, int, int, int](
+ successful=sum(
+ req.input_metrics.total_tokens or 0
+ for req in accumulator.requests.successful
+ ),
+ incomplete=sum(
+ req.input_metrics.total_tokens or 0
+ for req in accumulator.requests.incomplete
+ ),
+ errored=sum(
+ req.input_metrics.total_tokens or 0
+ for req in accumulator.requests.errored
+ ),
+ total=0, # Will be computed
+ )
+ input_tokens_count.total = (
+ (input_tokens_count.successful or 0)
+ + (input_tokens_count.incomplete or 0)
+ + (input_tokens_count.errored or 0)
+ )
+
+ # Compile distribution metrics from request statistics
+ start_time = accumulator.timings.finalized_measure_start
+ end_time = accumulator.timings.finalized_measure_end
+
+ # Filter requests within measurement period
+ # If no valid measurement window (both -1.0), use all requests
+ if start_time == -1.0 or end_time == -1.0:
+ successful = accumulator.requests.successful
+ incomplete = accumulator.requests.incomplete
+ errored = accumulator.requests.errored
+ else:
+ successful = [
+ req for req in accumulator.requests.successful
+ if start_time <= req.request_end_time <= end_time
+ ]
+ incomplete = [
+ req for req in accumulator.requests.incomplete
+ if start_time <= req.request_end_time <= end_time
+ ]
+ errored = [
+ req for req in accumulator.requests.errored
+ if start_time <= req.request_end_time <= end_time
+ ]
+
+ # Compile distribution summaries
+ requests_per_second = (
+ StatusDistributionSummary
+ .rate_distribution_from_timings_function(
+ function=lambda req: req.request_end_time,
+ successful=successful,
+ incomplete=incomplete,
+ errored=errored,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ )
+
+ request_concurrency = (
+ StatusDistributionSummary
+ .concurrency_distribution_from_timings_function(
+ function=lambda req: (
+ (req.request_start_time, req.request_end_time)
+ if req.request_start_time is not None
+ and req.request_end_time is not None
+ else None
+ ),
+ successful=successful,
+ incomplete=incomplete,
+ errored=errored,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ )
+
+ request_latency = StatusDistributionSummary.from_values(
+ successful=[
+ req.request_latency
+ for req in successful
+ if req.request_latency is not None
+ ],
+ incomplete=[
+ req.request_latency
+ for req in incomplete
+ if req.request_latency is not None
+ ],
+ errored=[
+ req.request_latency
+ for req in errored
+ if req.request_latency is not None
+ ],
+ )
+
+ input_tokens_per_second = (
+ StatusDistributionSummary
+ .rate_distribution_from_timings_function(
+ function=lambda req: req.input_tokens_timing,
+ successful=successful,
+ incomplete=incomplete,
+ errored=errored,
+ )
+ )
+
+ # Compile quality metrics if available
+ quality_metrics = None
+ if accumulator.quality_enabled and accumulator.quality is not None:
+ quality_metrics = EmbeddingsQualityMetrics(
+ baseline_cosine_similarity=accumulator.quality.baseline_cosine_similarity,
+ self_consistency_score=accumulator.quality.self_consistency_score,
+ mteb_main_score=accumulator.quality.mteb_main_score,
+ mteb_task_scores=accumulator.quality.mteb_task_scores,
+ )
+
+ return EmbeddingsMetrics(
+ request_totals=request_totals,
+ requests_per_second=requests_per_second,
+ request_concurrency=request_concurrency,
+ request_latency=request_latency,
+ input_tokens_count=input_tokens_count,
+ input_tokens_per_second=input_tokens_per_second,
+ prompt_token_count=input_tokens_count, # Alias for compatibility
+ tokens_per_second=input_tokens_per_second, # Alias for compatibility
+ quality=quality_metrics,
+ encoding_format_breakdown=accumulator.encoding_format_breakdown,
+ )
diff --git a/src/guidellm/benchmark/schemas/embeddings/report.py b/src/guidellm/benchmark/schemas/embeddings/report.py
new file mode 100644
index 000000000..14a4c47ac
--- /dev/null
+++ b/src/guidellm/benchmark/schemas/embeddings/report.py
@@ -0,0 +1,194 @@
+"""
+Report container for multiple embeddings benchmark results with persistence.
+
+Provides data structures for aggregating multiple embeddings benchmark executions
+into a single report with file I/O capabilities. Supports loading and saving benchmark
+collections in JSON and YAML formats, enabling result persistence, sharing, and analysis
+across different execution sessions.
+"""
+
+from __future__ import annotations
+
+import json
+import platform
+from importlib.metadata import version
+from pathlib import Path
+from typing import ClassVar, Literal
+
+import yaml
+from pydantic import Field
+
+from guidellm.benchmark.schemas.embeddings.benchmark import EmbeddingsBenchmark
+from guidellm.benchmark.schemas.embeddings.entrypoints import (
+ BenchmarkEmbeddingsArgs,
+)
+from guidellm.schemas import StandardBaseModel
+
+__all__ = ["EmbeddingsBenchmarkMetadata", "EmbeddingsBenchmarksReport"]
+
+
+class EmbeddingsBenchmarkMetadata(StandardBaseModel):
+ """
+ Versioning and environment metadata for embeddings benchmark reports.
+ """
+
+ version: Literal[1] = Field(
+ description=(
+ "Version of the benchmark report schema, increments "
+ "whenever there is a breaking change to the output format"
+ ),
+ default=1,
+ )
+ guidellm_version: str = Field(
+ description="Version of the guidellm package used for the benchmark",
+ default_factory=lambda: version("guidellm"),
+ )
+ python_version: str = Field(
+ description="Version of Python interpreter used during the benchmark",
+ default_factory=lambda: platform.python_version(),
+ )
+ platform: str = Field(
+ description="Operating system platform where the benchmark was executed",
+ default_factory=lambda: platform.platform(),
+ )
+
+
+class EmbeddingsBenchmarksReport(StandardBaseModel):
+ """
+ Container for multiple embeddings benchmark results with load/save functionality.
+
+ Aggregates multiple embeddings benchmark executions into a single report,
+ providing persistence through JSON and YAML file formats. Enables result
+ collection, storage, and retrieval across different execution sessions.
+
+ :cvar DEFAULT_FILE: Default filename used when saving to or loading from a directory
+ """
+
+ DEFAULT_FILE: ClassVar[str] = "embeddings_benchmarks.json"
+
+ type_: Literal["embeddings_benchmarks_report"] = Field(
+ description="Type identifier for embeddings benchmarks report",
+ default="embeddings_benchmarks_report",
+ )
+ metadata: EmbeddingsBenchmarkMetadata = Field(
+ description="Metadata about the benchmark report and execution environment",
+ default_factory=EmbeddingsBenchmarkMetadata,
+ )
+ args: BenchmarkEmbeddingsArgs = Field(
+ description="Benchmark arguments used for all benchmarks in the report"
+ )
+ benchmarks: list[EmbeddingsBenchmark] = Field(
+ description="List of completed embeddings benchmarks in the report",
+ default_factory=list,
+ )
+
+ def save_file(
+ self,
+ path: str | Path | None = None,
+ type_: Literal["json", "yaml"] | None = None,
+ ) -> Path:
+ """
+ Save report to file in JSON or YAML format.
+
+ :param path: File path or directory for saving, defaults to current
+ directory
+ :param type_: File format override ('json' or 'yaml'), auto-detected
+ from extension
+ :return: Resolved path to the saved file
+ :raises ValueError: If file type is unsupported or cannot be determined
+ """
+ file_path = EmbeddingsBenchmarksReport._resolve_path(
+ path if path is not None else Path.cwd()
+ )
+
+ if type_ is None:
+ type_ = EmbeddingsBenchmarksReport._detect_type(file_path)
+
+ if type_ == "json":
+ file_path.write_text(
+ json.dumps(
+ self.model_dump(mode="json"),
+ indent=2,
+ ensure_ascii=False,
+ )
+ )
+ elif type_ == "yaml":
+ file_path.write_text(
+ yaml.dump(
+ self.model_dump(mode="json"),
+ default_flow_style=False,
+ sort_keys=False,
+ )
+ )
+ else:
+ raise ValueError(f"Unsupported file type: {type_}")
+
+ return file_path
+
+ @classmethod
+ def load_file(
+ cls, path: str | Path, type_: Literal["json", "yaml"] | None = None
+ ) -> EmbeddingsBenchmarksReport:
+ """
+ Load report from file in JSON or YAML format.
+
+ :param path: File path to load from
+ :param type_: File format override, auto-detected from extension if None
+ :return: Loaded embeddings benchmarks report instance
+ :raises ValueError: If file type is unsupported or cannot be determined
+ :raises FileNotFoundError: If specified file does not exist
+ """
+ file_path = EmbeddingsBenchmarksReport._resolve_path(path)
+
+ if not file_path.exists():
+ raise FileNotFoundError(f"File not found: {file_path}")
+
+ if type_ is None:
+ type_ = EmbeddingsBenchmarksReport._detect_type(file_path)
+
+ content = file_path.read_text()
+
+ if type_ == "json":
+ data = json.loads(content)
+ elif type_ == "yaml":
+ data = yaml.safe_load(content)
+ else:
+ raise ValueError(f"Unsupported file type: {type_}")
+
+ return cls.model_validate(data)
+
+ @staticmethod
+ def _resolve_path(path: str | Path) -> Path:
+ """
+ Resolve file path, using DEFAULT_FILE if path is a directory.
+
+ :param path: Input path as string or Path object
+ :return: Resolved absolute Path to file
+ """
+ file_path = Path(path) if isinstance(path, str) else path
+
+ if file_path.is_dir():
+ file_path = file_path / EmbeddingsBenchmarksReport.DEFAULT_FILE
+
+ return file_path.resolve()
+
+ @staticmethod
+ def _detect_type(path: Path) -> Literal["json", "yaml"]:
+ """
+ Detect file type from path extension.
+
+ :param path: File path to analyze
+ :return: Detected file type ('json' or 'yaml')
+ :raises ValueError: If extension is not recognized
+ """
+ suffix = path.suffix.lower()
+
+ if suffix in {".json"}:
+ return "json"
+ elif suffix in {".yaml", ".yml"}:
+ return "yaml"
+ else:
+ raise ValueError(
+ f"Cannot detect file type from extension: {suffix}. "
+ "Use type_ parameter to specify 'json' or 'yaml'"
+ )
diff --git a/src/guidellm/benchmark/schemas/generative/accumulator.py b/src/guidellm/benchmark/schemas/generative/accumulator.py
index 5a64b7a19..a7d7ee199 100644
--- a/src/guidellm/benchmark/schemas/generative/accumulator.py
+++ b/src/guidellm/benchmark/schemas/generative/accumulator.py
@@ -788,6 +788,21 @@ class GenerativeBenchmarkAccumulator(
description="Running metrics for incomplete requests",
)
+ def model_post_init(self, __context):
+ """
+ Initialize child accumulators with config values after model construction.
+
+ Propagates sample_requests from config to child request accumulators to ensure
+ consistent sampling behavior across completed, errored, and incomplete request
+ collections. This ensures the --sample-requests option functions correctly.
+ """
+ super().model_post_init(__context)
+
+ # Propagate sample_requests from config to child accumulators
+ self.completed.sample_requests = self.config.sample_requests
+ self.errored.sample_requests = self.config.sample_requests
+ self.incomplete.sample_requests = self.config.sample_requests
+
def update_estimate(
self,
response: GenerationResponse | None,
diff --git a/src/guidellm/benchmark/schemas/generative/entrypoints.py b/src/guidellm/benchmark/schemas/generative/entrypoints.py
index 45d9a4b27..e85a5ba58 100644
--- a/src/guidellm/benchmark/schemas/generative/entrypoints.py
+++ b/src/guidellm/benchmark/schemas/generative/entrypoints.py
@@ -252,7 +252,7 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
)
# Benchmarker configuration
sample_requests: int | None = Field(
- default=10,
+ default=None,
description="Number of requests to sample for detailed metrics (None for all)",
)
warmup: int | float | dict | TransientPhaseConfig | None = Field(
diff --git a/src/guidellm/data/__init__.py b/src/guidellm/data/__init__.py
index 8ff8609b9..22d54c97e 100644
--- a/src/guidellm/data/__init__.py
+++ b/src/guidellm/data/__init__.py
@@ -1,5 +1,5 @@
from .builders import ShortPromptStrategy
-from .collators import GenerativeRequestCollator
+from .collators import EmbeddingsRequestCollator, GenerativeRequestCollator
from .deserializers import (
DataNotSupportedError,
DatasetDeserializer,
@@ -25,6 +25,7 @@
"DatasetFinalizer",
"DatasetPreprocessor",
"DatasetsIterator",
+ "EmbeddingsRequestCollator",
"FinalizerRegistry",
"GenerativeDatasetColumnType",
"GenerativeRequestCollator",
diff --git a/src/guidellm/data/collators.py b/src/guidellm/data/collators.py
index f9e1ade4f..55e994a2a 100644
--- a/src/guidellm/data/collators.py
+++ b/src/guidellm/data/collators.py
@@ -2,10 +2,16 @@
from guidellm.schemas import GenerationRequest
-__all__ = ["GenerativeRequestCollator"]
+__all__ = ["EmbeddingsRequestCollator", "GenerativeRequestCollator"]
class GenerativeRequestCollator:
+ """
+ Collator for generative (chat/completion) requests.
+
+ Currently enforces batch size of 1 - batching not yet supported.
+ """
+
def __call__(self, batch: list) -> GenerationRequest:
if len(batch) != 1:
raise NotImplementedError(
@@ -14,3 +20,28 @@ def __call__(self, batch: list) -> GenerationRequest:
)
return batch[0]
+
+
+class EmbeddingsRequestCollator:
+ """
+ Collator for embeddings requests.
+
+ Simple pass-through that enforces batch size of 1. Embeddings requests
+ are already properly formatted by the EmbeddingsRequestFinalizer.
+ """
+
+ def __call__(self, batch: list) -> GenerationRequest:
+ """
+ Collate batch of embeddings requests.
+
+ :param batch: List of GenerationRequest objects (should be length 1)
+ :return: Single GenerationRequest
+ :raises NotImplementedError: If batch size > 1
+ """
+ if len(batch) != 1:
+ raise NotImplementedError(
+ f"Batch size greater than 1 is not currently supported. "
+ f"Got batch size: {len(batch)}"
+ )
+
+ return batch[0]
diff --git a/src/guidellm/data/config.py b/src/guidellm/data/config.py
index 2b0b2133a..ea14967e0 100644
--- a/src/guidellm/data/config.py
+++ b/src/guidellm/data/config.py
@@ -93,7 +93,7 @@ def _load_config_str(data: str, config_class: type[ConfigT]) -> ConfigT | None:
except Exception as err: # noqa: BLE001
error = err
- if data_str.count("=") > 1:
+ if data_str.count("=") >= 1:
# key=value pairs separated by commas
try:
config_dict = {}
diff --git a/src/guidellm/data/finalizers.py b/src/guidellm/data/finalizers.py
index f804ec821..128b1f992 100644
--- a/src/guidellm/data/finalizers.py
+++ b/src/guidellm/data/finalizers.py
@@ -113,3 +113,50 @@ def __call__( # noqa: C901 PLR0912
input_metrics=input_metrics,
output_metrics=output_metrics,
)
+
+
+@FinalizerRegistry.register("embeddings")
+class EmbeddingsRequestFinalizer(DatasetFinalizer[GenerationRequest]):
+ """
+ Finalizer that converts dataset rows into embeddings GenerationRequest objects.
+
+ Much simpler than GenerativeRequestFinalizer since embeddings only need
+ a text input field. Collects text from 'text_column' and creates a request
+ with basic token/word counting.
+
+ Example:
+ ::
+ finalizer = EmbeddingsRequestFinalizer()
+ row = {"text_column": ["This is a test sentence"]}
+ request = finalizer(row)
+ # request.body["input"] == "This is a test sentence"
+ """
+
+ def __call__(self, columns: dict[str, Any]) -> GenerationRequest:
+ """
+ Convert dataset row to embeddings request.
+
+ :param columns: Dict with 'text_column' containing text strings
+ :return: GenerationRequest configured for embeddings
+ """
+ input_metrics = UsageMetrics()
+ texts = []
+
+ # Collect all text inputs
+ for text in columns.get("text_column", []):
+ if not text:
+ continue
+
+ texts.append(text)
+ input_metrics.add_text_metrics(text)
+
+ # For embeddings, input is a single text or list of texts
+ if not texts:
+ raise ValueError("No text found in dataset row for embeddings")
+
+ # Create GenerationRequest with columns and metrics
+ return GenerationRequest(
+ columns=columns,
+ input_metrics=input_metrics,
+ output_metrics=UsageMetrics(), # Embeddings have no output
+ )
diff --git a/src/guidellm/data/preprocessors/__init__.py b/src/guidellm/data/preprocessors/__init__.py
index abe493aea..0df1b1efd 100644
--- a/src/guidellm/data/preprocessors/__init__.py
+++ b/src/guidellm/data/preprocessors/__init__.py
@@ -1,3 +1,4 @@
+from .embeddings_mapper import EmbeddingsColumnMapper
from .encoders import MediaEncoder
from .mappers import GenerativeColumnMapper
from .preprocessor import (
@@ -9,6 +10,7 @@
__all__ = [
"DataDependentPreprocessor",
"DatasetPreprocessor",
+ "EmbeddingsColumnMapper",
"GenerativeColumnMapper",
"MediaEncoder",
"PreprocessorRegistry",
diff --git a/src/guidellm/data/preprocessors/embeddings_mapper.py b/src/guidellm/data/preprocessors/embeddings_mapper.py
new file mode 100644
index 000000000..b3517da61
--- /dev/null
+++ b/src/guidellm/data/preprocessors/embeddings_mapper.py
@@ -0,0 +1,191 @@
+"""
+Column mapper for embeddings datasets.
+
+Maps common text column names to the standard 'text_column' field expected by
+the embeddings finalizer. Much simpler than the generative mapper since embeddings
+only need a single text input field.
+"""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from typing import Any, ClassVar, cast
+
+from datasets import Dataset, IterableDataset
+
+from guidellm.data.preprocessors.preprocessor import (
+ DataDependentPreprocessor,
+ PreprocessorRegistry,
+)
+
+__all__ = ["EmbeddingsColumnMapper"]
+
+
+@PreprocessorRegistry.register("embeddings_column_mapper")
+class EmbeddingsColumnMapper(DataDependentPreprocessor):
+ """
+ Maps dataset columns to embeddings text field.
+
+ Searches for common text column names and maps them to 'text_column'
+ for the embeddings finalizer to consume.
+
+ Example:
+ ::
+ # Dataset with "text" column
+ mapper = EmbeddingsColumnMapper()
+ dataset = Dataset.from_dict({"text": ["Hello", "World"]})
+ result = mapper.map(dataset)
+ # result["text_column"] will contain the text values
+ """
+
+ defaults: ClassVar[dict[str, list[str]]] = {
+ "text_column": [
+ "text",
+ "input",
+ "content",
+ "prompt",
+ "sentence",
+ "document",
+ "passage",
+ "query",
+ "body",
+ "message",
+ ],
+ }
+
+ def __init__(
+ self,
+ column_mappings: dict[str, str | list[str]] | None = None,
+ **_: Any, # Ignore global kwargs
+ ):
+ self.input_mappings = column_mappings
+ self.datasets_column_mappings: dict[str, list[tuple[int, str]]] | None = None
+
+ @classmethod
+ def datasets_default_mappings(
+ cls, datasets: list[Dataset | IterableDataset]
+ ) -> dict[str, list[tuple[int, str]]]:
+ """
+ Auto-detect text columns from datasets.
+
+ :param datasets: List of datasets to analyze
+ :return: Mapping of column types to (dataset_index, column_name) tuples
+ """
+ mappings: dict[str, list[tuple[int, str]]] = defaultdict(list)
+
+ for index, dataset in enumerate(datasets):
+ dataset_columns = dataset.column_names or list(next(iter(dataset)).keys())
+
+ # Try to find text column
+ if "text_column" not in mappings or not mappings["text_column"]:
+ for name_base in cls.defaults.get("text_column", []):
+ # Try various case variations
+ for variant in [
+ name_base,
+ name_base.lower(),
+ name_base.upper(),
+ name_base.capitalize(),
+ ]:
+ if variant in dataset_columns:
+ mappings["text_column"].append((index, variant))
+ break
+ if mappings["text_column"]:
+ break
+
+ return mappings
+
+ @classmethod
+ def datasets_mappings(
+ cls,
+ datasets: list[Dataset | IterableDataset],
+ input_mappings: dict[str, str | list[str]],
+ ) -> dict[str, list[tuple[int, str]]]:
+ """
+ Create mappings from user-specified column names.
+
+ :param datasets: List of datasets to map
+ :param input_mappings: User-specified mappings
+ :return: Validated mappings of column types to (dataset_index,
+ column_name) tuples
+ """
+ mappings: dict[str, list[tuple[int, str]]] = defaultdict(list)
+
+ datasets_named_indices = {
+ (
+ dataset.info.dataset_name
+ if dataset.info and dataset.info.dataset_name
+ else index
+ ): index
+ for index, dataset in enumerate(datasets)
+ }
+ datasets_columns = {
+ index: dataset.column_names or list(next(iter(dataset)).keys())
+ for index, dataset in enumerate(datasets)
+ }
+
+ # Parse user mappings
+ for column_type, names in input_mappings.items():
+ mappings[column_type] = []
+ for name in names if isinstance(names, list) else [names]:
+ if "." in name:
+ dataset, column_name = name.split(".", 1)
+ dataset_index = (
+ int(dataset)
+ if dataset.isdigit()
+ else datasets_named_indices.get(dataset)
+ )
+ else:
+ dataset_index = 0
+ column_name = name
+
+ if dataset_index is None or dataset_index >= len(datasets):
+ raise ValueError(
+ f"Dataset '{name}' not found in datasets: "
+ f"{datasets_named_indices}."
+ )
+ if column_name not in datasets_columns[dataset_index]:
+ raise ValueError(
+ f"Column '{column_name}' not found in dataset {dataset_index}. "
+ f"Available columns: {datasets_columns[dataset_index]}"
+ )
+
+ mappings[column_type].append((dataset_index, column_name))
+
+ return mappings
+
+ def __call__(self, row: dict[str, Any]) -> dict[str, list[Any]]:
+ """
+ Transform a row by extracting text columns based on established mappings.
+
+ :param row: Dictionary containing 'items' key with dataset rows
+ :return: Mapped dictionary with 'text_column' key
+ """
+ if self.datasets_column_mappings is None:
+ raise ValueError("EmbeddingsColumnMapper not setup with data.")
+
+ items = cast("dict[int, dict[str, Any]]", row.pop("items"))
+ mapped: dict[str, Any] = defaultdict(list)
+
+ for column_type, column_mappings in self.datasets_column_mappings.items():
+ for dataset_index, dataset_column in column_mappings:
+ mapped[column_type].append(items[dataset_index][dataset_column])
+
+ return dict(mapped)
+
+ def setup_data(
+ self,
+ datasets: list[Dataset | IterableDataset],
+ data_args: list[dict[str, Any]],
+ ):
+ """
+ Initialize column mappings from datasets.
+
+ :param datasets: List of datasets to process
+ :param data_args: Arguments for each dataset (unused for this mapper)
+ """
+ _ = data_args # Unused for this mapper
+ self.datasets_column_mappings = (
+ self.datasets_default_mappings(datasets)
+ if self.input_mappings is None
+ else self.datasets_mappings(datasets, self.input_mappings)
+ )
diff --git a/src/guidellm/data/schemas.py b/src/guidellm/data/schemas.py
index 16af56dff..5ac978530 100644
--- a/src/guidellm/data/schemas.py
+++ b/src/guidellm/data/schemas.py
@@ -125,6 +125,7 @@ class SyntheticTextDatasetConfig(DataConfig):
output_tokens: int = Field(
description="The average number of text tokens generated for outputs.",
gt=0,
+ default=1,
)
output_tokens_stdev: int | None = Field(
description="The standard deviation of the tokens generated for outputs.",
diff --git a/src/guidellm/mock_server/handlers/__init__.py b/src/guidellm/mock_server/handlers/__init__.py
index 7dbc209ff..f4a34f75e 100644
--- a/src/guidellm/mock_server/handlers/__init__.py
+++ b/src/guidellm/mock_server/handlers/__init__.py
@@ -12,6 +12,12 @@
from .chat_completions import ChatCompletionsHandler
from .completions import CompletionsHandler
+from .embeddings import EmbeddingsHandler
from .tokenizer import TokenizerHandler
-__all__ = ["ChatCompletionsHandler", "CompletionsHandler", "TokenizerHandler"]
+__all__ = [
+ "ChatCompletionsHandler",
+ "CompletionsHandler",
+ "EmbeddingsHandler",
+ "TokenizerHandler",
+]
diff --git a/src/guidellm/mock_server/handlers/embeddings.py b/src/guidellm/mock_server/handlers/embeddings.py
new file mode 100644
index 000000000..c24eaa539
--- /dev/null
+++ b/src/guidellm/mock_server/handlers/embeddings.py
@@ -0,0 +1,253 @@
+"""
+Mock server handler for OpenAI-compatible /v1/embeddings endpoint.
+
+Generates synthetic normalized embedding vectors with configurable dimensions and
+encoding formats. Simulates realistic embedding API behavior including timing delays,
+token counting, and batch processing while providing deterministic outputs for testing.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import json
+import math
+import random
+import struct
+from typing import TYPE_CHECKING
+
+from pydantic import ValidationError
+from sanic import response
+from sanic.request import Request
+from sanic.response import HTTPResponse
+
+from guidellm.mock_server.models import (
+ EmbeddingObject,
+ EmbeddingsRequest,
+ EmbeddingsResponse,
+ ErrorDetail,
+ ErrorResponse,
+ Usage,
+)
+from guidellm.mock_server.utils import MockTokenizer
+
+if TYPE_CHECKING:
+ from guidellm.mock_server.config import MockServerConfig
+
+__all__ = ["EmbeddingsHandler"]
+
+
+class EmbeddingsHandler:
+ """
+ Handler for /v1/embeddings endpoint in mock server.
+
+ Processes embeddings requests and generates synthetic normalized embedding
+ vectors with realistic timing simulation. Supports both float and base64
+ encoding formats, batch processing, and optional dimension reduction.
+
+ Example:
+ ::
+ handler = EmbeddingsHandler(config)
+ response = await handler.handle(request)
+ """
+
+ def __init__(self, config: MockServerConfig):
+ """
+ Initialize embeddings handler with server configuration.
+
+ :param config: Mock server configuration with timing and model parameters
+ """
+ self.config = config
+ self.tokenizer = MockTokenizer()
+
+ async def handle(self, request: Request) -> HTTPResponse:
+ """
+ Process embeddings request and return response.
+
+ :param request: HTTP request containing embeddings parameters
+ :return: HTTP response with embeddings data or error
+ """
+ try:
+ # Parse request body
+ req = EmbeddingsRequest(**request.json)
+ except ValidationError as exc:
+ return response.json(
+ ErrorResponse(
+ error=ErrorDetail(
+ message=f"Invalid request: {str(exc)}",
+ type="invalid_request_error",
+ code="invalid_request",
+ )
+ ).model_dump(),
+ status=400,
+ )
+ except (json.JSONDecodeError, TypeError):
+ return response.json(
+ ErrorResponse(
+ error=ErrorDetail(
+ message="Invalid JSON in request body",
+ type="invalid_request_error",
+ code="invalid_json",
+ )
+ ).model_dump(),
+ status=400,
+ )
+
+ # Handle input as list
+ inputs = [req.input] if isinstance(req.input, str) else req.input
+
+ # Determine embedding dimensions
+ dimensions = (
+ req.dimensions if req.dimensions is not None else 384
+ ) # Default dim
+
+ # Validate encoding format
+ encoding_format = req.encoding_format or "float"
+ if encoding_format not in {"float", "base64"}:
+ return response.json(
+ ErrorResponse(
+ error=ErrorDetail(
+ message=(
+ f"Invalid encoding_format: {encoding_format}. "
+ "Must be 'float' or 'base64'"
+ ),
+ type="invalid_request_error",
+ code="invalid_encoding_format",
+ )
+ ).model_dump(),
+ status=400,
+ )
+
+ # Count total tokens (for timing and usage)
+ total_tokens = 0
+ for text in inputs:
+ tokens = len(self.tokenizer.tokenize(text))
+
+ # Apply truncation if requested
+ if req.truncate_prompt_tokens is not None:
+ tokens = min(tokens, req.truncate_prompt_tokens)
+
+ total_tokens += tokens
+
+ # Simulate time-to-first-token delay based on input tokens
+ # TTFT is proportional to input processing time
+ if self.config.ttft_ms > 0:
+ delay_ms = max(
+ 0,
+ random.gauss(
+ self.config.ttft_ms,
+ self.config.ttft_ms_std if self.config.ttft_ms_std > 0 else 0,
+ ),
+ )
+ await asyncio.sleep(delay_ms / 1000.0)
+
+ # Generate embeddings for each input
+ embeddings_data = []
+ for index, _text in enumerate(inputs):
+ # Generate synthetic normalized embedding
+ embedding_vector = self._generate_embedding(dimensions)
+
+ # Encode based on requested format
+ embedding_encoded: list[float] | str
+ if encoding_format == "base64":
+ embedding_encoded = self._encode_to_base64(embedding_vector)
+ else:
+ embedding_encoded = embedding_vector
+
+ embeddings_data.append(
+ EmbeddingObject(
+ embedding=embedding_encoded,
+ index=index,
+ )
+ )
+
+ # Build usage stats (embeddings have no completion_tokens)
+ usage = Usage(
+ prompt_tokens=total_tokens,
+ completion_tokens=0, # Embeddings don't generate tokens
+ )
+
+ # Build response
+ embeddings_response = EmbeddingsResponse(
+ data=embeddings_data,
+ model=req.model,
+ usage=usage,
+ )
+
+ return HTTPResponse(
+ body=embeddings_response.model_dump_json(),
+ status=200,
+ headers={"Content-Type": "application/json"},
+ )
+
+ def _generate_embedding(self, dimensions: int) -> list[float]:
+ """
+ Generate synthetic normalized embedding vector.
+
+ Creates a random vector and normalizes it to unit length (L2 norm = 1),
+ which is standard for embedding models.
+
+ :param dimensions: Number of dimensions for the embedding
+ :return: Normalized embedding vector as list of floats
+
+ Example:
+ ::
+ emb = handler._generate_embedding(384)
+ norm = math.sqrt(sum(x*x for x in emb)) # Should be ≈1.0
+ """
+ # Generate random vector from Gaussian distribution
+ embedding = [random.gauss(0, 1) for _ in range(dimensions)]
+
+ # Normalize to unit length
+ norm = math.sqrt(sum(x * x for x in embedding))
+ if norm > 0:
+ embedding = [x / norm for x in embedding]
+
+ return embedding
+
+ def _encode_to_base64(self, embedding: list[float]) -> str:
+ """
+ Encode embedding vector as base64-encoded binary string.
+
+ Converts float list to packed binary format (little-endian floats)
+ and encodes as base64 string for efficient transmission.
+
+ :param embedding: Embedding vector as list of floats
+ :return: Base64-encoded binary representation
+
+ Example:
+ ::
+ embedding = [0.1, 0.2, 0.3]
+ encoded = handler._encode_to_base64(embedding)
+ # Returns base64 string like "MzMzPz8/Pz8/Pz8="
+ """
+ # Pack floats as little-endian binary
+ # Format: 'f' = single-precision float (4 bytes each)
+ bytes_data = struct.pack(f"{len(embedding)}f", *embedding)
+
+ # Encode as base64
+ return base64.b64encode(bytes_data).decode("utf-8")
+
+ @staticmethod
+ def decode_from_base64(encoded: str, dimensions: int) -> list[float]:
+ """
+ Decode base64-encoded embedding back to float list.
+
+ Utility method for testing and validation. Reverses the encoding
+ performed by _encode_to_base64.
+
+ :param encoded: Base64-encoded binary string
+ :param dimensions: Number of dimensions to decode
+ :return: Decoded embedding vector as list of floats
+
+ Example:
+ ::
+ encoded = "MzMzPz8/Pz8/Pz8="
+ decoded = EmbeddingsHandler.decode_from_base64(encoded, 3)
+ # Returns approximately [0.1, 0.2, 0.3]
+ """
+ # Decode base64 to bytes
+ bytes_data = base64.b64decode(encoded)
+
+ # Unpack floats
+ return list(struct.unpack(f"{dimensions}f", bytes_data))
diff --git a/src/guidellm/mock_server/models.py b/src/guidellm/mock_server/models.py
index cd342f7a9..f9fcedfa5 100644
--- a/src/guidellm/mock_server/models.py
+++ b/src/guidellm/mock_server/models.py
@@ -26,6 +26,9 @@
"CompletionsResponse",
"DetokenizeRequest",
"DetokenizeResponse",
+ "EmbeddingObject",
+ "EmbeddingsRequest",
+ "EmbeddingsResponse",
"ErrorDetail",
"ErrorResponse",
"StreamOptions",
@@ -486,6 +489,82 @@ class DetokenizeResponse(BaseModel):
text: str = Field(description="Reconstructed text from tokens")
+class EmbeddingsRequest(BaseModel):
+ """Request parameters for embeddings API endpoints.
+
+ OpenAI-compatible embeddings request supporting both single and batch
+ input processing with multiple encoding formats and optional parameters.
+ """
+
+ input: str | list[str] = Field(
+ description="Text(s) to generate embeddings for (single string or list)"
+ )
+ model: str = Field(description="Model identifier to use for embeddings")
+ encoding_format: Literal["float", "base64"] | None = Field(
+ default="float",
+ description=(
+ "Format for embedding output (float array or "
+ "base64-encoded binary)"
+ ),
+ )
+ dimensions: int | None = Field(
+ default=None,
+ description=(
+ "Number of dimensions for output embeddings. "
+ "Supports matryoshka embeddings for models that support it."
+ ),
+ )
+ truncate_prompt_tokens: int | None = Field(
+ default=None,
+ description=(
+ "Maximum number of tokens to use from input "
+ "(truncates if exceeded)"
+ ),
+ )
+ user: str | None = Field(
+ default=None,
+ description="User identifier for tracking and abuse monitoring",
+ )
+
+
+class EmbeddingObject(BaseModel):
+ """A single embedding vector in the response.
+
+ Represents one embedded text with its vector representation and
+ metadata for batch processing.
+ """
+
+ object: Literal["embedding"] = Field(
+ default="embedding", description="Object type identifier"
+ )
+ embedding: list[float] | str = Field(
+ description=(
+ "Embedding vector as float list or base64-encoded binary string. "
+ "Format depends on encoding_format parameter in request."
+ )
+ )
+ index: int = Field(
+ description="Position of this embedding in the input batch (0-indexed)"
+ )
+
+
+class EmbeddingsResponse(BaseModel):
+ """Response containing generated embeddings for input text(s).
+
+ Returns embedding vectors for each input text along with token
+ usage statistics and model metadata.
+ """
+
+ object: Literal["list"] = Field(
+ default="list", description="Object type identifier"
+ )
+ data: list[EmbeddingObject] = Field(
+ description="List of embedding objects, one per input text"
+ )
+ model: str = Field(description="Model identifier used for generation")
+ usage: Usage = Field(description="Token usage statistics for the request")
+
+
class ErrorDetail(BaseModel):
"""Detailed error information for API failures.
diff --git a/src/guidellm/mock_server/server.py b/src/guidellm/mock_server/server.py
index e1d3b6860..743a1b6e2 100644
--- a/src/guidellm/mock_server/server.py
+++ b/src/guidellm/mock_server/server.py
@@ -23,6 +23,7 @@
from guidellm.mock_server.handlers import (
ChatCompletionsHandler,
CompletionsHandler,
+ EmbeddingsHandler,
TokenizerHandler,
)
@@ -56,6 +57,7 @@ def __init__(self, config: MockServerConfig) -> None:
self.app = Sanic("guidellm-mock-server")
self.chat_handler = ChatCompletionsHandler(config)
self.completions_handler = CompletionsHandler(config)
+ self.embeddings_handler = EmbeddingsHandler(config)
self.tokenizer_handler = TokenizerHandler(config)
self._setup_middleware()
@@ -114,6 +116,12 @@ async def completions(request: Request):
return response.text("", status=204)
return await self.completions_handler.handle(request)
+ @self.app.route("/v1/embeddings", methods=["POST", "OPTIONS"])
+ async def embeddings(request: Request):
+ if request.method == "OPTIONS":
+ return response.text("", status=204)
+ return await self.embeddings_handler.handle(request)
+
@self.app.route("/tokenize", methods=["POST", "OPTIONS"])
async def tokenize(request: Request):
if request.method == "OPTIONS":
diff --git a/src/guidellm/schemas/__init__.py b/src/guidellm/schemas/__init__.py
index 4c78446fe..1ba2b2256 100644
--- a/src/guidellm/schemas/__init__.py
+++ b/src/guidellm/schemas/__init__.py
@@ -22,6 +22,7 @@
SuccessfulT,
TotalT,
)
+from .embeddings_request_stats import EmbeddingsRequestStats
from .info import RequestInfo, RequestTimings
from .request import (
GenerationRequest,
@@ -40,6 +41,7 @@
__all__ = [
"BaseModelT",
"DistributionSummary",
+ "EmbeddingsRequestStats",
"ErroredT",
"FunctionObjT",
"GenerationRequest",
diff --git a/src/guidellm/schemas/base.py b/src/guidellm/schemas/base.py
index cd733b67c..c8f6b6706 100644
--- a/src/guidellm/schemas/base.py
+++ b/src/guidellm/schemas/base.py
@@ -223,21 +223,21 @@ class StatusBreakdown(BaseModel, Generic[SuccessfulT, ErroredT, IncompleteT, Tot
)
"""
- successful: SuccessfulT = Field(
+ successful: SuccessfulT | None = Field(
description="Results or metrics for requests with successful completion status",
- default=None, # type: ignore[assignment]
+ default=None,
)
- errored: ErroredT = Field(
+ errored: ErroredT | None = Field(
description="Results or metrics for requests with error completion status",
- default=None, # type: ignore[assignment]
+ default=None,
)
- incomplete: IncompleteT = Field(
+ incomplete: IncompleteT | None = Field(
description="Results or metrics for requests with incomplete processing status",
- default=None, # type: ignore[assignment]
+ default=None,
)
- total: TotalT = Field(
+ total: TotalT | None = Field(
description="Aggregated results or metrics combining all status categories",
- default=None, # type: ignore[assignment]
+ default=None,
)
diff --git a/src/guidellm/schemas/embeddings_request_stats.py b/src/guidellm/schemas/embeddings_request_stats.py
new file mode 100644
index 000000000..770bea83e
--- /dev/null
+++ b/src/guidellm/schemas/embeddings_request_stats.py
@@ -0,0 +1,136 @@
+"""
+Request statistics for embeddings benchmark analysis.
+
+Provides data structures for capturing and analyzing performance metrics from
+embeddings workloads. The module contains request-level statistics including
+input token counts, latency measurements, and optional quality validation metrics
+such as cosine similarity for evaluating embeddings benchmark performance.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import Field, computed_field
+
+from guidellm.schemas.base import StandardBaseDict
+from guidellm.schemas.info import RequestInfo
+from guidellm.schemas.request import UsageMetrics
+
+__all__ = ["EmbeddingsRequestStats"]
+
+
+class EmbeddingsRequestStats(StandardBaseDict):
+ """
+ Request statistics for embeddings workloads.
+
+ Captures comprehensive performance metrics for individual embeddings requests,
+ including input token counts, timing measurements, and optional quality validation
+ metrics. Unlike generative requests, embeddings do not produce output tokens
+ or have streaming behavior.
+
+ Example:
+ ::
+ stats = EmbeddingsRequestStats(
+ request_id="req_123",
+ info=request_info,
+ input_metrics=input_usage
+ )
+ latency = stats.request_latency
+ """
+
+ type_: Literal["embeddings_request_stats"] = "embeddings_request_stats"
+ request_id: str = Field(description="Unique identifier for the request")
+ response_id: str | None = Field(
+ default=None, description="Unique identifier matching API Response ID"
+ )
+ request_args: str | None = Field(
+ default=None, description="Backend arguments used for this request"
+ )
+ info: RequestInfo = Field(description="Request metadata and timing information")
+ input_metrics: UsageMetrics = Field(
+ description="Token usage statistics for the input text"
+ )
+
+ # Quality validation metrics (optional)
+ cosine_similarity: float | None = Field(
+ default=None,
+ description="Cosine similarity score against baseline model (0.0-1.0)",
+ )
+ encoding_format: str | None = Field(
+ default="float",
+ description="Encoding format used for embeddings (float or base64)",
+ )
+
+ # Request timing stats
+ @computed_field # type: ignore[misc]
+ @property
+ def request_start_time(self) -> float | None:
+ """
+ :return: Timestamp when the request started, or None if unavailable
+ """
+ return (
+ self.info.timings.request_start
+ if self.info.timings.request_start is not None
+ else self.info.timings.resolve_start
+ )
+
+ @computed_field # type: ignore[misc]
+ @property
+ def request_end_time(self) -> float:
+ """
+ :return: Timestamp when the request ended, or None if unavailable
+ """
+ if self.info.timings.resolve_end is None:
+ raise ValueError("resolve_end timings should be set but is None.")
+
+ return (
+ self.info.timings.request_end
+ if self.info.timings.request_end is not None
+ else self.info.timings.resolve_end
+ )
+
+ @computed_field # type: ignore[misc]
+ @property
+ def request_latency(self) -> float | None:
+ """
+ End-to-end request processing latency in seconds.
+
+ :return: Duration from request start to completion, or None if unavailable
+ """
+ start = self.info.timings.request_start
+ end = self.info.timings.request_end
+ if start is None or end is None:
+ return None
+
+ return end - start
+
+ # Input token stats (no output tokens for embeddings)
+ @computed_field # type: ignore[misc]
+ @property
+ def prompt_tokens(self) -> int | None:
+ """
+ :return: Number of tokens in the input text, or None if unavailable
+ """
+ return self.input_metrics.total_tokens
+
+ @computed_field # type: ignore[misc]
+ @property
+ def total_tokens(self) -> int | None:
+ """
+ :return: Same as prompt_tokens (embeddings have no output tokens)
+ """
+ return self.prompt_tokens
+
+ @computed_field # type: ignore[misc]
+ @property
+ def input_tokens_timing(self) -> tuple[float, float]:
+ """
+ Timing tuple for input token processing.
+
+ :return: Tuple of (timestamp, token_count) for input processing
+ """
+ return (
+ self.request_end_time,
+ self.prompt_tokens or 0.0,
+ )
diff --git a/src/guidellm/schemas/statistics.py b/src/guidellm/schemas/statistics.py
index 17f2f2ddf..74dfd5a50 100644
--- a/src/guidellm/schemas/statistics.py
+++ b/src/guidellm/schemas/statistics.py
@@ -655,14 +655,14 @@ def count(self) -> int:
"""
:return: Total count of samples across all status categories
"""
- return self.total.count
+ return self.total.count if self.total is not None else 0
@property
def total_sum(self) -> float:
"""
:return: Total sum of values across all status categories
"""
- return self.total.total_sum
+ return self.total.total_sum if self.total is not None else 0.0
@classmethod
def from_values(
diff --git a/src/guidellm/settings.py b/src/guidellm/settings.py
index 0e6e6c455..df14a6554 100644
--- a/src/guidellm/settings.py
+++ b/src/guidellm/settings.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import json
+import sys
from collections.abc import Sequence
from enum import Enum
from typing import Literal
@@ -38,6 +39,24 @@ class Environment(str, Enum):
}
+def _get_default_mp_context_type() -> Literal["spawn", "fork", "forkserver"]:
+ """
+ Get the default multiprocessing context type based on the platform.
+
+ On macOS (darwin), 'fork' is unsafe and causes issues with asyncio and
+ multiprocessing queues. Use 'spawn' instead. On Linux, 'fork' is the
+ default and generally works well.
+
+ :return: The recommended multiprocessing context type for the platform
+ """
+ if sys.platform == "darwin":
+ # macOS: fork is unsafe, use spawn
+ return "spawn"
+ else:
+ # Linux and others: fork is generally safe and faster
+ return "fork"
+
+
class LoggingSettings(BaseModel):
"""
Logging settings for the application
@@ -108,13 +127,10 @@ class Settings(BaseSettings):
logging: LoggingSettings = LoggingSettings()
default_sweep_number: int = 10
- # HTTP settings
- request_follow_redirects: bool = True
- request_timeout: int = 60 * 5 # 5 minutes
- request_http2: bool = True
-
# Scheduler settings
- mp_context_type: Literal["spawn", "fork", "forkserver"] | None = "fork"
+ mp_context_type: Literal["spawn", "fork", "forkserver"] | None = Field(
+ default_factory=_get_default_mp_context_type
+ )
mp_serialization: Literal["dict", "sequence"] | None = "dict"
mp_encoding: (
Literal["msgpack", "msgspec"]
@@ -135,14 +151,6 @@ class Settings(BaseSettings):
# Data settings
dataset: DatasetSettings = DatasetSettings()
- # Request/stats settings
- preferred_prompt_tokens_source: Literal["request", "response"] = "response"
- preferred_output_tokens_source: Literal["request", "response"] = "response"
- preferred_backend: Literal["openai"] = "openai"
- preferred_route: Literal["text_completions", "chat_completions"] = (
- "chat_completions"
- )
-
# Report settings
report_generation: ReportGenerationSettings = ReportGenerationSettings()
diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py
index 37f2e8d36..e13c34da6 100644
--- a/src/guidellm/utils/text.py
+++ b/src/guidellm/utils/text.py
@@ -20,7 +20,6 @@
import httpx
from loguru import logger
-from guidellm.settings import settings
from guidellm.utils.console import Colors
__all__ = [
@@ -232,7 +231,7 @@ def load_text(data: str | Path, encoding: str | None = None) -> str:
# check URLs
if isinstance(data, str) and data.strip().startswith(("http", "ftp")):
- with httpx.Client(timeout=settings.request_timeout) as client:
+ with httpx.Client() as client:
response = client.get(data.strip())
response.raise_for_status()
return response.text
diff --git a/tests/e2e/test_embeddings_benchmark.py b/tests/e2e/test_embeddings_benchmark.py
new file mode 100644
index 000000000..8eed769e3
--- /dev/null
+++ b/tests/e2e/test_embeddings_benchmark.py
@@ -0,0 +1,590 @@
+# E2E tests for embeddings benchmark scenarios
+
+import json
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+import pytest
+import requests
+from loguru import logger
+
+
+class EmbeddingsMockServer:
+ """Mock server for embeddings E2E tests using guidellm mock-server."""
+
+ def __init__(self, port: int, model: str = "BAAI/bge-base-en-v1.5"):
+ self.port = port
+ self.model = model
+ self.server_url = f"http://127.0.0.1:{self.port}"
+ self.health_url = f"{self.server_url}/health"
+ self.process: subprocess.Popen | None = None
+
+ def get_guidellm_executable(self) -> str:
+ """Get the path to the guidellm executable in the current environment."""
+ python_bin_dir = Path(sys.executable).parent
+ guidellm_path = python_bin_dir / "guidellm"
+ if guidellm_path.exists():
+ return str(guidellm_path)
+ return "guidellm"
+
+ def start(self):
+ """Start the mock embeddings server."""
+ guidellm_exe = self.get_guidellm_executable()
+
+ logger.info(f"Starting embeddings mock server on {self.server_url}...")
+ command = [
+ guidellm_exe,
+ "mock-server",
+ "--port",
+ str(self.port),
+ "--model",
+ self.model,
+ ]
+ logger.info(f"Server command: {' '.join(command)}")
+
+ self.process = subprocess.Popen( # noqa: S603
+ command,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ )
+
+ # Wait for server to become healthy
+ max_retries = 30
+ retry_delay_sec = 0.5
+ for i in range(max_retries):
+ try:
+ response = requests.get(self.health_url, timeout=1)
+ if response.status_code == 200:
+ logger.info(f"Embeddings mock server started at {self.server_url}")
+ return
+ except requests.RequestException:
+ pass
+
+ if i < max_retries - 1:
+ time.sleep(retry_delay_sec)
+
+ # Server didn't start, terminate and raise
+ self.stop()
+ raise RuntimeError(
+ f"Embeddings mock server failed to start after {max_retries} retries"
+ )
+
+ def stop(self):
+ """Stop the mock server."""
+ if self.process and self.process.poll() is None:
+ logger.info("Stopping embeddings mock server...")
+ self.process.terminate()
+ try:
+ self.process.wait(timeout=5)
+ except subprocess.TimeoutExpired:
+ logger.warning("Server did not terminate gracefully, killing it...")
+ self.process.kill()
+ self.process.wait()
+ logger.info("Embeddings mock server stopped.")
+
+ def get_url(self) -> str:
+ """Get the server URL."""
+ return self.server_url
+
+
+class EmbeddingsClient:
+ """Wrapper for running guidellm embeddings benchmark commands."""
+
+ def __init__(
+ self, target: str, output_dir: Path, outputs: str = "embeddings_benchmarks.json"
+ ):
+ self.target = target
+ self.output_dir = output_dir
+ self.outputs = outputs
+ self.process: subprocess.Popen | None = None
+ self.stdout: str | None = None
+ self.stderr: str | None = None
+
+ def get_guidellm_executable(self) -> str:
+ """Get the path to the guidellm executable."""
+ python_bin_dir = Path(sys.executable).parent
+ guidellm_path = python_bin_dir / "guidellm"
+ if guidellm_path.exists():
+ return str(guidellm_path)
+ return "guidellm"
+
+ def start_benchmark(
+ self,
+ data: str = "Benchmark this text for embeddings quality",
+ profile: str = "constant",
+ rate: int = 10,
+ max_requests: int | None = None,
+ max_duration: int | None = None,
+ encoding_format: str = "float",
+ enable_quality_validation: bool = False,
+ baseline_model: str | None = None,
+ quality_tolerance: float | None = None,
+ processor: str | None = None,
+ additional_args: str = "",
+ ):
+ """Start embeddings benchmark command."""
+ guidellm_exe = self.get_guidellm_executable()
+
+ # Build command components
+ cmd_parts = [
+ f"HF_HOME={self.output_dir / 'huggingface_cache'}",
+ f"{guidellm_exe} benchmark embeddings",
+ f"--target {self.target}",
+ f"--data '{data}'",
+ f"--profile {profile}",
+ f"--rate {rate}",
+ f"--encoding-format {encoding_format}",
+ f"--output-dir {self.output_dir}",
+ f"--outputs {self.outputs}",
+ ]
+
+ if max_requests is not None:
+ cmd_parts.append(f"--max-requests {max_requests}")
+
+ if max_duration is not None:
+ cmd_parts.append(f"--max-duration {max_duration}")
+
+ if enable_quality_validation:
+ cmd_parts.append("--enable-quality-validation")
+
+ if baseline_model is not None:
+ cmd_parts.append(f"--baseline-model {baseline_model}")
+
+ if quality_tolerance is not None:
+ cmd_parts.append(f"--quality-tolerance {quality_tolerance}")
+
+ if processor is not None:
+ cmd_parts.append(f"--processor {processor}")
+
+ if additional_args:
+ cmd_parts.append(additional_args)
+
+ command = " \\\n ".join(cmd_parts)
+ logger.info(f"Embeddings benchmark command: {command}")
+
+ self.process = subprocess.Popen( # noqa: S603
+ ["/bin/sh", "-c", command],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ )
+
+ def wait_for_completion(self, timeout: int = 30):
+ """Wait for the benchmark to complete."""
+ if self.process is None:
+ raise RuntimeError("No process started. Call start_benchmark() first.")
+
+ try:
+ logger.info("Waiting for embeddings benchmark to complete...")
+ self.stdout, self.stderr = self.process.communicate(timeout=timeout)
+ logger.debug(f"Benchmark stdout:\n{self.stdout}")
+ logger.debug(f"Benchmark stderr:\n{self.stderr}")
+ except subprocess.TimeoutExpired:
+ logger.warning("Benchmark did not complete within timeout, terminating...")
+ self.process.terminate()
+ try:
+ self.stdout, self.stderr = self.process.communicate(timeout=5)
+ except subprocess.TimeoutExpired:
+ logger.warning("Benchmark did not terminate gracefully, killing it...")
+ self.process.kill()
+ self.stdout, self.stderr = self.process.communicate()
+
+
+@pytest.fixture(scope="module")
+def embeddings_server():
+ """Pytest fixture to start and stop embeddings mock server."""
+ server = EmbeddingsMockServer(port=8001, model="test-embedding-model")
+ try:
+ server.start()
+ yield server
+ finally:
+ server.stop()
+
+
+def assert_no_python_exceptions(stderr: str | None) -> None:
+ """Assert that stderr does not contain Python exception indicators."""
+ if stderr is None:
+ return
+
+ python_exception_indicators = [
+ "Traceback (most recent call last):",
+ "AttributeError:",
+ "ValueError:",
+ "TypeError:",
+ "KeyError:",
+ "IndexError:",
+ "NameError:",
+ "ImportError:",
+ "RuntimeError:",
+ ]
+
+ for indicator in python_exception_indicators:
+ assert indicator not in stderr, f"Python exception detected: {indicator}"
+
+
+def load_embeddings_report(report_path: Path) -> dict:
+ """Load and validate embeddings benchmark report."""
+ assert report_path.exists(), f"Report file does not exist: {report_path}"
+
+ with report_path.open("r") as f:
+ report = json.load(f)
+
+ assert "type_" in report, "Report missing 'type_' field"
+ assert report["type_"] == "embeddings_benchmarks_report", (
+ f"Expected embeddings_benchmarks_report, got {report['type_']}"
+ )
+ assert "benchmarks" in report, "Report missing 'benchmarks' field"
+ assert len(report["benchmarks"]) > 0, "Report contains no benchmarks"
+
+ return report
+
+
+def assert_embeddings_request_fields(requests: list) -> None:
+ """Assert that embeddings requests contain expected fields."""
+ assert len(requests) >= 1, "No requests found"
+
+ for request in requests:
+ # Basic fields
+ assert "request_id" in request, "Missing 'request_id' field"
+ assert "request_latency" in request, "Missing 'request_latency' field"
+ assert request["request_latency"] > 0, "request_latency should be > 0"
+
+ # Input token metrics (no output tokens for embeddings)
+ assert "prompt_tokens" in request, "Missing 'prompt_tokens' field"
+ assert request["prompt_tokens"] > 0, "prompt_tokens should be > 0"
+
+ assert "total_tokens" in request, "Missing 'total_tokens' field"
+ assert request["total_tokens"] > 0, "total_tokens should be > 0"
+
+ # Should NOT have output token fields
+ assert "output_tokens" not in request or request["output_tokens"] is None, (
+ "Embeddings should not have output_tokens"
+ )
+
+ # Should NOT have streaming fields
+ assert "time_to_first_token_ms" not in request, (
+ "Embeddings should not have time_to_first_token_ms"
+ )
+ assert "inter_token_latency_ms" not in request, (
+ "Embeddings should not have inter_token_latency_ms"
+ )
+
+ # Encoding format
+ assert "encoding_format" in request, "Missing 'encoding_format' field"
+ assert request["encoding_format"] in ["float", "base64"], (
+ f"Invalid encoding_format: {request['encoding_format']}"
+ )
+
+
+@pytest.mark.timeout(30)
+@pytest.mark.sanity
+def test_basic_embeddings_benchmark(
+ embeddings_server: EmbeddingsMockServer, tmp_path: Path
+):
+ """Test basic embeddings benchmark execution."""
+ report_name = "basic_embeddings.json"
+ report_path = tmp_path / report_name
+
+ client = EmbeddingsClient(
+ target=embeddings_server.get_url(),
+ output_dir=tmp_path,
+ outputs=report_name,
+ )
+
+ client.start_benchmark(
+ data=["Test embeddings benchmark"],
+ max_requests=10,
+ processor="gpt2",
+ )
+
+ client.wait_for_completion(timeout=30)
+
+ # Assert no Python exceptions
+ assert_no_python_exceptions(client.stderr)
+
+ # Load and validate report
+ report = load_embeddings_report(report_path)
+ benchmark = report["benchmarks"][0]
+
+ # Validate requests
+ successful_requests = benchmark["requests"]["successful"]
+ assert len(successful_requests) == 10, (
+ f"Expected 10 successful requests, got {len(successful_requests)}"
+ )
+ assert_embeddings_request_fields(successful_requests)
+
+ # Validate metrics structure
+ metrics = benchmark["metrics"]
+ assert "request_totals" in metrics
+ assert "input_tokens_count" in metrics
+ assert "encoding_format_breakdown" in metrics
+
+ # Should NOT have output token metrics
+ assert "output_tokens_count" not in metrics, (
+ "Embeddings metrics should not have output_tokens_count"
+ )
+
+
+@pytest.mark.timeout(30)
+@pytest.mark.sanity
+def test_embeddings_float_encoding(
+ embeddings_server: EmbeddingsMockServer, tmp_path: Path
+):
+ """Test embeddings benchmark with float encoding format."""
+ report_name = "float_encoding_embeddings.json"
+ report_path = tmp_path / report_name
+
+ client = EmbeddingsClient(
+ target=embeddings_server.get_url(),
+ output_dir=tmp_path,
+ outputs=report_name,
+ )
+
+ client.start_benchmark(
+ data=["Test float encoding"],
+ max_requests=5,
+ encoding_format="float",
+ processor="gpt2",
+ )
+
+ client.wait_for_completion(timeout=30)
+ assert_no_python_exceptions(client.stderr)
+
+ report = load_embeddings_report(report_path)
+ benchmark = report["benchmarks"][0]
+
+ # Check encoding format
+ successful_requests = benchmark["requests"]["successful"]
+ for request in successful_requests:
+ assert request["encoding_format"] == "float"
+
+ # Check encoding_format_breakdown in metrics
+ metrics = benchmark["metrics"]
+ assert "float" in metrics["encoding_format_breakdown"]
+ assert metrics["encoding_format_breakdown"]["float"] == 5
+
+
+@pytest.mark.timeout(30)
+@pytest.mark.sanity
+def test_embeddings_base64_encoding(
+ embeddings_server: EmbeddingsMockServer, tmp_path: Path
+):
+ """Test embeddings benchmark with base64 encoding format."""
+ report_name = "base64_encoding_embeddings.json"
+ report_path = tmp_path / report_name
+
+ client = EmbeddingsClient(
+ target=embeddings_server.get_url(),
+ output_dir=tmp_path,
+ outputs=report_name,
+ )
+
+ client.start_benchmark(
+ data=["Test base64 encoding"],
+ max_requests=5,
+ encoding_format="base64",
+ processor="gpt2",
+ )
+
+ client.wait_for_completion(timeout=30)
+ assert_no_python_exceptions(client.stderr)
+
+ report = load_embeddings_report(report_path)
+ benchmark = report["benchmarks"][0]
+
+ # Check encoding format
+ successful_requests = benchmark["requests"]["successful"]
+ for request in successful_requests:
+ assert request["encoding_format"] == "base64"
+
+ # Check encoding_format_breakdown in metrics
+ metrics = benchmark["metrics"]
+ assert "base64" in metrics["encoding_format_breakdown"]
+ assert metrics["encoding_format_breakdown"]["base64"] == 5
+
+
+@pytest.mark.timeout(60)
+@pytest.mark.sanity
+def test_embeddings_csv_output(
+ embeddings_server: EmbeddingsMockServer, tmp_path: Path
+):
+ """Test embeddings benchmark CSV output generation."""
+ client = EmbeddingsClient(
+ target=embeddings_server.get_url(),
+ output_dir=tmp_path,
+ outputs="json,csv",
+ )
+
+ client.start_benchmark(
+ data=["Test CSV output"],
+ max_requests=5,
+ processor="gpt2",
+ )
+
+ client.wait_for_completion(timeout=60)
+ assert_no_python_exceptions(client.stderr)
+
+ # Check both JSON and CSV files exist
+ json_path = tmp_path / "embeddings_benchmarks.json"
+ csv_path = tmp_path / "embeddings_benchmarks.csv"
+
+ assert json_path.exists(), "JSON output file not created"
+ assert csv_path.exists(), "CSV output file not created"
+
+ # Validate CSV has content
+ csv_content = csv_path.read_text()
+ assert len(csv_content) > 0, "CSV file is empty"
+ assert "request_latency" in csv_content, "CSV missing request_latency column"
+ assert "prompt_tokens" in csv_content, "CSV missing prompt_tokens column"
+
+
+@pytest.mark.timeout(60)
+@pytest.mark.sanity
+def test_embeddings_html_output(
+ embeddings_server: EmbeddingsMockServer, tmp_path: Path
+):
+ """Test embeddings benchmark HTML output generation."""
+ client = EmbeddingsClient(
+ target=embeddings_server.get_url(),
+ output_dir=tmp_path,
+ outputs="json,html",
+ )
+
+ client.start_benchmark(
+ data=["Test HTML output"],
+ max_requests=5,
+ processor="gpt2",
+ )
+
+ client.wait_for_completion(timeout=60)
+ assert_no_python_exceptions(client.stderr)
+
+ # Check both JSON and HTML files exist
+ json_path = tmp_path / "embeddings_benchmarks.json"
+ html_path = tmp_path / "embeddings_benchmarks.html"
+
+ assert json_path.exists(), "JSON output file not created"
+ assert html_path.exists(), "HTML output file not created"
+
+ # Validate HTML has content
+ html_content = html_path.read_text()
+ assert len(html_content) > 0, "HTML file is empty"
+ assert " Percentiles:
+ """Helper to create Percentiles with all required fields."""
+ return Percentiles(
+ p001=p50 * 0.5,
+ p01=p50 * 0.6,
+ p05=p50 * 0.7,
+ p10=p50 * 0.8,
+ p25=p50 * 0.9,
+ p50=p50,
+ p75=p50 * 1.05,
+ p90=p50 * 1.1,
+ p95=p50 * 1.15,
+ p99=p50 * 1.2,
+ p999=p50 * 1.25,
+ )
+
+
+def create_distribution_summary(
+ mean=0.5,
+ median=0.5,
+ mode=0.5,
+ variance=0.01,
+ std_dev=0.1,
+ min_val=0.1,
+ max_val=1.0,
+ count=100,
+ total_sum=50.0,
+) -> DistributionSummary:
+ """Helper to create DistributionSummary with all required fields."""
+ return DistributionSummary(
+ mean=mean,
+ median=median,
+ mode=mode,
+ variance=variance,
+ std_dev=std_dev,
+ min=min_val,
+ max=max_val,
+ count=count,
+ total_sum=total_sum,
+ percentiles=create_percentiles(median),
+ )
+
+
+@pytest.fixture
+def sample_benchmark() -> EmbeddingsBenchmark:
+ """Create a sample embeddings benchmark for testing."""
+ # Create basic scheduler state
+ scheduler_state = SchedulerState(
+ request_count=10,
+ successful_count=10,
+ incomplete_count=0,
+ errored_count=0,
+ )
+
+ scheduler_metrics = SchedulerMetrics(
+ start_time=0.0,
+ request_start_time=0.1,
+ measure_start_time=1.0,
+ measure_end_time=9.0,
+ request_end_time=9.9,
+ end_time=10.0,
+ requests_made=StatusBreakdown(successful=10, incomplete=0, errored=0, total=10),
+ queued_time_avg=0.01,
+ resolve_start_delay_avg=0.005,
+ resolve_targeted_start_delay_avg=0.002,
+ request_start_delay_avg=0.003,
+ resolve_time_avg=0.15,
+ )
+
+ # Create quality metrics
+ quality_metrics = EmbeddingsQualityMetrics(
+ baseline_cosine_similarity=StatusDistributionSummary(
+ successful=create_distribution_summary(
+ mean=0.98, median=0.985, count=10, total_sum=9.8
+ ),
+ errored=None,
+ incomplete=None,
+ total=None,
+ ),
+ mteb_main_score=75.5,
+ mteb_task_scores={"STS12": 72.3, "STS13": 78.1},
+ )
+
+ # Create metrics
+ latency_dist = create_distribution_summary(
+ mean=0.15, median=0.14, count=10, total_sum=1.5
+ )
+ metrics = EmbeddingsMetrics(
+ request_totals=StatusBreakdown(
+ successful=10, incomplete=0, errored=0, total=10
+ ),
+ requests_per_second=StatusDistributionSummary(
+ successful=create_distribution_summary(
+ mean=20.0, count=10, total_sum=200.0
+ ),
+ errored=None,
+ incomplete=None,
+ total=create_distribution_summary(
+ mean=20.0, count=10, total_sum=200.0
+ ),
+ ),
+ request_concurrency=StatusDistributionSummary(
+ successful=create_distribution_summary(mean=2.0, count=10, total_sum=20.0),
+ errored=None,
+ incomplete=None,
+ total=create_distribution_summary(mean=2.0, count=10, total_sum=20.0),
+ ),
+ request_latency=StatusDistributionSummary(
+ successful=latency_dist,
+ errored=None,
+ incomplete=None,
+ total=latency_dist,
+ ),
+ input_tokens_count=StatusBreakdown(
+ successful=500, incomplete=0, errored=0, total=500
+ ),
+ input_tokens_per_second=StatusDistributionSummary(
+ successful=create_distribution_summary(
+ mean=100.0, count=10, total_sum=1000.0
+ ),
+ errored=None,
+ incomplete=None,
+ total=create_distribution_summary(mean=100.0, count=10, total_sum=1000.0),
+ ),
+ quality=quality_metrics,
+ encoding_format_breakdown={"float": 7, "base64": 3},
+ )
+
+ # Create sample request stats
+ successful_requests = []
+ for i in range(10):
+ info = RequestInfo(request_id=f"req-{i}", status="completed")
+ info.timings.request_start = float(i)
+ info.timings.request_end = float(i) + 0.15
+ info.timings.resolve_end = float(i) + 0.15
+
+ stats = EmbeddingsRequestStats(
+ request_id=f"req-{i}",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=50),
+ encoding_format="float" if i < 7 else "base64",
+ cosine_similarity=0.98 if i % 2 == 0 else None,
+ )
+ successful_requests.append(stats)
+
+ requests = StatusBreakdown(
+ successful=successful_requests,
+ incomplete=[],
+ errored=[],
+ total=None,
+ )
+
+ # Create a minimal config (we won't use most fields for output testing)
+ from guidellm.scheduler import SynchronousStrategy
+
+ config = BenchmarkConfig(
+ run_id="test-run-001",
+ run_index=0,
+ strategy=SynchronousStrategy(rate=10),
+ constraints={},
+ profile=SynchronousProfile(rate=10),
+ requests={
+ "type": "embeddings",
+ "model": "test-embedding-model",
+ },
+ backend={
+ "type": "openai_http",
+ "url": "http://localhost:8000",
+ },
+ environment={
+ "platform": "test",
+ "python_version": "3.11",
+ },
+ )
+
+ return EmbeddingsBenchmark(
+ config=config,
+ scheduler_state=scheduler_state,
+ scheduler_metrics=scheduler_metrics,
+ metrics=metrics,
+ requests=requests,
+ start_time=0.0,
+ end_time=10.0,
+ duration=10.0,
+ warmup_duration=1.0,
+ cooldown_duration=1.0,
+ )
+
+
+@pytest.fixture
+def sample_report(sample_benchmark: EmbeddingsBenchmark) -> EmbeddingsBenchmarksReport:
+ """Create a sample embeddings benchmark report for testing."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ model="test-embedding-model",
+ backend="openai_http",
+ enable_quality_validation=True,
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2",
+ encoding_format="float",
+ )
+
+ return EmbeddingsBenchmarksReport(
+ benchmarks=[sample_benchmark],
+ args=args,
+ metadata=EmbeddingsBenchmarkMetadata(),
+ )
+
+
+class TestEmbeddingsBenchmarkerSerialized:
+ """Tests for EmbeddingsBenchmarkerSerialized (JSON/YAML output)."""
+
+ @pytest.mark.smoke
+ def test_class_registration(self):
+ """Test that serialized formatter is properly registered."""
+ from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+
+ # Should be registered for both json and yaml
+ assert "json" in EmbeddingsBenchmarkerOutput.registry
+ assert "yaml" in EmbeddingsBenchmarkerOutput.registry
+ assert (
+ EmbeddingsBenchmarkerOutput.registry["json"]
+ == EmbeddingsBenchmarkerSerialized
+ )
+
+ @pytest.mark.smoke
+ def test_validated_kwargs(self):
+ """Test validated_kwargs normalizes paths correctly."""
+ # Test with string path
+ kwargs = EmbeddingsBenchmarkerSerialized.validated_kwargs(
+ output_path="/tmp/test.json" # noqa: S108
+ )
+ assert "output_path" in kwargs
+ assert isinstance(kwargs["output_path"], Path)
+ assert str(kwargs["output_path"]) == "/tmp/test.json" # noqa: S108
+
+ # Test with Path object
+ path_obj = Path("/tmp/test.json") # noqa: S108
+ kwargs = EmbeddingsBenchmarkerSerialized.validated_kwargs(
+ output_path=path_obj
+ )
+ assert kwargs["output_path"] == path_obj
+
+ # Test with None
+ kwargs = EmbeddingsBenchmarkerSerialized.validated_kwargs(output_path=None)
+ assert "output_path" not in kwargs
+
+ @pytest.mark.asyncio
+ @pytest.mark.sanity
+ async def test_finalize_json(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test finalizing report to JSON file."""
+ output_file = tmp_path / "test_embeddings.json"
+ formatter = EmbeddingsBenchmarkerSerialized(output_path=output_file)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ assert result_path == output_file
+ assert result_path.suffix == ".json"
+
+ # Validate JSON content
+ with result_path.open("r") as f:
+ data = json.load(f)
+
+ assert data["type_"] == "embeddings_benchmarks_report"
+ assert len(data["benchmarks"]) == 1
+ assert "metadata" in data
+ assert "args" in data
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_finalize_yaml(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test finalizing report to YAML file."""
+ output_file = tmp_path / "test_embeddings.yaml"
+ formatter = EmbeddingsBenchmarkerSerialized(output_path=output_file)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ assert result_path == output_file
+ assert result_path.suffix in [".yaml", ".yml"]
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_finalize_directory(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test finalizing with directory path (should use default filename)."""
+ formatter = EmbeddingsBenchmarkerSerialized(output_path=tmp_path)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ assert result_path.parent == tmp_path
+ # Default behavior should create a file with some name
+ assert result_path.suffix in [".json", ".yaml", ".yml"]
+
+
+class TestEmbeddingsBenchmarkerCSV:
+ """Tests for EmbeddingsBenchmarkerCSV output formatter."""
+
+ @pytest.mark.smoke
+ def test_class_registration(self):
+ """Test that CSV formatter is properly registered."""
+ from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+
+ assert "csv" in EmbeddingsBenchmarkerOutput.registry
+ assert (
+ EmbeddingsBenchmarkerOutput.registry["csv"] == EmbeddingsBenchmarkerCSV
+ )
+
+ @pytest.mark.smoke
+ def test_default_filename(self):
+ """Test default CSV filename."""
+ assert EmbeddingsBenchmarkerCSV.DEFAULT_FILE == "embeddings_benchmarks.csv"
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_csv_creates_file(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test that finalize creates a valid CSV file."""
+ output_file = tmp_path / "test_embeddings.csv"
+ formatter = EmbeddingsBenchmarkerCSV(output_path=output_file)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ assert result_path == output_file
+ assert result_path.suffix == ".csv"
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_csv_structure(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test CSV has correct structure and headers."""
+ output_file = tmp_path / "test_embeddings.csv"
+ formatter = EmbeddingsBenchmarkerCSV(output_path=output_file)
+
+ await formatter.finalize(sample_report)
+
+ # Read CSV and check structure
+ with output_file.open("r") as f:
+ reader = csv.reader(f)
+ rows = list(reader)
+
+ # Should have at least header rows + data rows
+ assert len(rows) >= 4 # Multi-row header + at least 1 data row
+
+ # Check for embeddings-specific headers (no output tokens or streaming)
+ csv_text = output_file.read_text()
+ assert "Request Latency" in csv_text
+ assert "Input Tokens" in csv_text
+
+ # Should NOT have output token or streaming headers
+ assert "Output Tokens" not in csv_text
+ assert "Time to First Token" not in csv_text
+ assert "Inter Token Latency" not in csv_text
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_csv_quality_metrics(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test CSV includes quality validation metrics."""
+ output_file = tmp_path / "test_embeddings.csv"
+ formatter = EmbeddingsBenchmarkerCSV(output_path=output_file)
+
+ await formatter.finalize(sample_report)
+
+ csv_text = output_file.read_text()
+
+ # Check for quality metrics
+ assert "Cosine Similarity" in csv_text or "Quality" in csv_text
+ assert "MTEB" in csv_text
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_csv_encoding_formats(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test CSV includes encoding format breakdown."""
+ output_file = tmp_path / "test_embeddings.csv"
+ formatter = EmbeddingsBenchmarkerCSV(output_path=output_file)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ csv_text = result_path.read_text()
+
+ # Check that CSV contains benchmark data (encoding format breakdown
+ # is stored in metrics but not separately exported to CSV)
+ assert "test-embedding-model" in csv_text
+ assert len(csv_text) > 0
+
+ @pytest.mark.regression
+ @pytest.mark.asyncio
+ async def test_csv_directory_path(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test CSV creation with directory path."""
+ formatter = EmbeddingsBenchmarkerCSV(output_path=tmp_path)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ assert result_path.parent == tmp_path
+ assert result_path.name == EmbeddingsBenchmarkerCSV.DEFAULT_FILE
+
+
+class TestEmbeddingsBenchmarkerHTML:
+ """Tests for EmbeddingsBenchmarkerHTML output formatter."""
+
+ @pytest.mark.smoke
+ def test_class_registration(self):
+ """Test that HTML formatter is properly registered."""
+ from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+
+ assert "html" in EmbeddingsBenchmarkerOutput.registry
+ assert (
+ EmbeddingsBenchmarkerOutput.registry["html"] == EmbeddingsBenchmarkerHTML
+ )
+
+ @pytest.mark.smoke
+ def test_default_filename(self):
+ """Test default HTML filename."""
+ assert EmbeddingsBenchmarkerHTML.DEFAULT_FILE == "embeddings_benchmarks.html"
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_html_creates_file(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test that finalize creates a valid HTML file."""
+ output_file = tmp_path / "test_embeddings.html"
+ formatter = EmbeddingsBenchmarkerHTML(output_path=output_file)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ assert result_path == output_file
+ assert result_path.suffix == ".html"
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_html_structure(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test HTML file has valid structure."""
+ output_file = tmp_path / "test_embeddings.html"
+ formatter = EmbeddingsBenchmarkerHTML(output_path=output_file)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ html_content = result_path.read_text()
+
+ # Check basic HTML structure
+ assert "" in html_content
+ assert "" in html_content
+ assert "" in html_content
+ assert "" in html_content
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_html_embeddings_data(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test HTML contains embeddings-specific data."""
+ output_file = tmp_path / "test_embeddings.html"
+ formatter = EmbeddingsBenchmarkerHTML(output_path=output_file)
+
+ result_path = await formatter.finalize(sample_report)
+
+ html_content = result_path.read_text()
+
+ # Check for embedded data and embeddings-specific content
+ assert "uiApiData" in html_content
+ assert (
+ "embeddings" in html_content.lower()
+ or "embedding" in html_content.lower()
+ )
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_html_no_streaming_metrics(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test HTML does not include streaming metrics."""
+ output_file = tmp_path / "test_embeddings.html"
+ formatter = EmbeddingsBenchmarkerHTML(output_path=output_file)
+
+ await formatter.finalize(sample_report)
+
+ html_content = output_file.read_text()
+
+ # Should NOT have streaming-related content
+ assert "Time to First Token" not in html_content
+ assert "TTFT" not in html_content
+ assert "Inter Token Latency" not in html_content
+ assert "ITL" not in html_content
+
+ @pytest.mark.regression
+ @pytest.mark.asyncio
+ async def test_html_directory_path(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test HTML creation with directory path."""
+ formatter = EmbeddingsBenchmarkerHTML(output_path=tmp_path)
+
+ result_path = await formatter.finalize(sample_report)
+
+ assert result_path.exists()
+ assert result_path.parent == tmp_path
+ assert result_path.name == EmbeddingsBenchmarkerHTML.DEFAULT_FILE
+
+
+class TestEmbeddingsBenchmarkerConsole:
+ """Tests for EmbeddingsBenchmarkerConsole output formatter."""
+
+ @pytest.mark.smoke
+ def test_class_registration(self):
+ """Test that console formatter is properly registered."""
+ from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput
+
+ assert "console" in EmbeddingsBenchmarkerOutput.registry
+ assert (
+ EmbeddingsBenchmarkerOutput.registry["console"]
+ == EmbeddingsBenchmarkerConsole
+ )
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_console_finalize(
+ self, sample_report: EmbeddingsBenchmarksReport
+ ):
+ """Test that console formatter finalize returns None (no file output)."""
+ formatter = EmbeddingsBenchmarkerConsole()
+
+ result = await formatter.finalize(sample_report)
+
+ # Console formatter doesn't write to file, should return None or empty Path
+ assert result is None or (isinstance(result, Path) and not result.exists())
+
+ @pytest.mark.regression
+ def test_console_instantiation(self):
+ """Test console formatter can be instantiated."""
+ formatter = EmbeddingsBenchmarkerConsole()
+ assert formatter is not None
+ assert isinstance(formatter, EmbeddingsBenchmarkerConsole)
+
+
+class TestOutputFormattersIntegration:
+ """Integration tests for output formatters working together."""
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ async def test_integration_multiple_formats(
+ self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path
+ ):
+ """Test that all formatters can process the same report."""
+ # JSON
+ json_formatter = EmbeddingsBenchmarkerSerialized(
+ output_path=tmp_path / "test.json"
+ )
+ json_path = await json_formatter.finalize(sample_report)
+ assert json_path.exists()
+
+ # CSV
+ csv_formatter = EmbeddingsBenchmarkerCSV(output_path=tmp_path / "test.csv")
+ csv_path = await csv_formatter.finalize(sample_report)
+ assert csv_path.exists()
+
+ # HTML
+ html_formatter = EmbeddingsBenchmarkerHTML(output_path=tmp_path / "test.html")
+ html_path = await html_formatter.finalize(sample_report)
+ assert html_path.exists()
+
+ # Console
+ console_formatter = EmbeddingsBenchmarkerConsole()
+ console_result = await console_formatter.finalize(sample_report)
+ # Console doesn't write files, returns None
+ assert console_result is None
+
+ @pytest.mark.regression
+ @pytest.mark.asyncio
+ async def test_empty_report_handling(self, tmp_path: Path):
+ """Test formatters handle reports with no benchmarks gracefully."""
+ # Create report with no benchmarks
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ model="test-model",
+ )
+ empty_report = EmbeddingsBenchmarksReport(
+ benchmarks=[],
+ args=args,
+ metadata=EmbeddingsBenchmarkMetadata(),
+ )
+
+ # JSON should still work
+ json_formatter = EmbeddingsBenchmarkerSerialized(
+ output_path=tmp_path / "empty.json"
+ )
+ json_path = await json_formatter.finalize(empty_report)
+ assert json_path.exists()
+
+ # Verify JSON content is valid
+ with json_path.open("r") as f:
+ data = json.load(f)
+ assert data["type_"] == "embeddings_benchmarks_report"
+ assert len(data["benchmarks"]) == 0
diff --git a/tests/unit/benchmark/quality/__init__.py b/tests/unit/benchmark/quality/__init__.py
new file mode 100644
index 000000000..f1791286e
--- /dev/null
+++ b/tests/unit/benchmark/quality/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for embeddings quality validation."""
diff --git a/tests/unit/benchmark/quality/test_mteb_integration.py b/tests/unit/benchmark/quality/test_mteb_integration.py
new file mode 100644
index 000000000..6546e586d
--- /dev/null
+++ b/tests/unit/benchmark/quality/test_mteb_integration.py
@@ -0,0 +1,223 @@
+from __future__ import annotations
+
+import pytest
+
+# Skip all tests if sentence-transformers/mteb aren't available
+pytest.importorskip("sentence_transformers", reason="sentence-transformers required")
+pytest.importorskip("mteb", reason="mteb required")
+
+from guidellm.benchmark.quality.mteb_integration import (
+ DEFAULT_MTEB_TASKS,
+ MTEBValidator,
+)
+
+
+class TestMTEBValidator:
+ """Tests for MTEB benchmark integration."""
+
+ @pytest.fixture
+ def validator(self):
+ """Create a validator with a test model and minimal tasks."""
+ # Use a small, fast model and single task for faster tests
+ return MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
+ task_names=["STS12"], # Single lightweight task
+ )
+
+ @pytest.mark.smoke
+ def test_initialization(self, validator):
+ """Test validator initialization."""
+ assert validator is not None
+ assert validator.model is not None
+ assert validator.task_names == ["STS12"]
+
+ @pytest.mark.smoke
+ def test_initialization_default_tasks(self):
+ """Test initialization with default MTEB tasks."""
+ validator = MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ assert validator.task_names == DEFAULT_MTEB_TASKS
+
+ @pytest.mark.sanity
+ def test_initialization_multiple_tasks(self):
+ """Test initialization with multiple tasks."""
+ tasks = ["STS12", "STS13", "STSBenchmark"]
+ validator = MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
+ task_names=tasks,
+ )
+
+ assert validator.task_names == tasks
+ assert len(validator.task_names) == 3
+
+ @pytest.mark.sanity
+ @pytest.mark.slow
+ def test_run_evaluation_single_task(self, validator):
+ """Test running MTEB evaluation with single task."""
+ results = validator.run_evaluation()
+
+ assert isinstance(results, dict)
+ assert "mteb_main_score" in results
+ assert "mteb_task_scores" in results
+
+ # Main score should be a float
+ assert isinstance(results["mteb_main_score"], float)
+
+ # Task scores should be a dict
+ assert isinstance(results["mteb_task_scores"], dict)
+ assert "STS12" in results["mteb_task_scores"]
+
+ @pytest.mark.sanity
+ @pytest.mark.slow
+ def test_run_evaluation_score_range(self, validator):
+ """Test that MTEB scores are in valid range."""
+ results = validator.run_evaluation()
+
+ # MTEB scores should be between 0 and 100
+ assert 0.0 <= results["mteb_main_score"] <= 100.0
+
+ for _task_name, score in results["mteb_task_scores"].items():
+ assert 0.0 <= score <= 100.0
+
+ @pytest.mark.regression
+ @pytest.mark.slow
+ def test_run_evaluation_multiple_tasks(self):
+ """Test running MTEB evaluation with multiple tasks."""
+ tasks = ["STS12", "STS13"]
+ validator = MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
+ task_names=tasks,
+ )
+
+ results = validator.run_evaluation()
+
+ assert "mteb_main_score" in results
+ assert "mteb_task_scores" in results
+
+ # Should have scores for both tasks
+ assert len(results["mteb_task_scores"]) == len(tasks)
+ for task in tasks:
+ assert task in results["mteb_task_scores"]
+
+ @pytest.mark.regression
+ @pytest.mark.slow
+ def test_main_score_is_average(self):
+ """Test that main score is average of task scores."""
+ tasks = ["STS12", "STS13"]
+ validator = MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
+ task_names=tasks,
+ )
+
+ results = validator.run_evaluation()
+
+ # Calculate expected average
+ task_scores = list(results["mteb_task_scores"].values())
+ expected_avg = sum(task_scores) / len(task_scores)
+
+ # Main score should be close to average
+ assert results["mteb_main_score"] == pytest.approx(expected_avg, abs=0.1)
+
+ @pytest.mark.sanity
+ def test_default_mteb_tasks_constant(self):
+ """Test that DEFAULT_MTEB_TASKS contains expected tasks."""
+ assert isinstance(DEFAULT_MTEB_TASKS, list)
+ assert len(DEFAULT_MTEB_TASKS) > 0
+
+ # Should contain STS tasks (standard for embeddings)
+ assert any("STS" in task for task in DEFAULT_MTEB_TASKS)
+
+ @pytest.mark.smoke
+ def test_model_loaded(self, validator):
+ """Test that SentenceTransformer model is loaded."""
+ assert validator.model is not None
+
+ # Should be able to encode text
+ embedding = validator.model.encode("Test sentence.")
+ assert embedding is not None
+ assert len(embedding) > 0
+
+ @pytest.mark.regression
+ def test_task_names_stored(self, validator):
+ """Test that task names are stored correctly."""
+ assert hasattr(validator, "task_names")
+ assert validator.task_names == ["STS12"]
+
+ @pytest.mark.sanity
+ @pytest.mark.slow
+ def test_evaluation_reproducible(self, validator):
+ """Test that evaluation produces consistent results."""
+ # Run evaluation twice
+ results1 = validator.run_evaluation()
+ results2 = validator.run_evaluation()
+
+ # Results should be identical (or very close)
+ assert results1["mteb_main_score"] == pytest.approx(
+ results2["mteb_main_score"], abs=0.01
+ )
+
+ for task in results1["mteb_task_scores"]:
+ assert results1["mteb_task_scores"][task] == pytest.approx(
+ results2["mteb_task_scores"][task], abs=0.01
+ )
+
+ @pytest.mark.regression
+ @pytest.mark.slow
+ def test_different_models_different_scores(self):
+ """Test that different models produce different scores."""
+ # This test verifies the evaluation is model-specific
+ validator1 = MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
+ task_names=["STS12"],
+ )
+
+ # Note: This would require a different model to be installed
+ # Skipping if second model not available
+ try:
+ validator2 = MTEBValidator(
+ model_name="sentence-transformers/paraphrase-MiniLM-L3-v2",
+ task_names=["STS12"],
+ )
+
+ results1 = validator1.run_evaluation()
+ results2 = validator2.run_evaluation()
+
+ # Different models should produce different scores
+ # (though they might be similar)
+ assert "mteb_main_score" in results1
+ assert "mteb_main_score" in results2
+ except Exception: # noqa: BLE001
+ # Skip if second model is unavailable
+ pytest.skip("Second model not available for comparison")
+
+ @pytest.mark.sanity
+ def test_initialization_with_none_tasks(self):
+ """Test initialization when tasks is None (should use default)."""
+ validator = MTEBValidator(
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
+ task_names=None,
+ )
+
+ # Should use DEFAULT_MTEB_TASKS
+ assert validator.task_names == DEFAULT_MTEB_TASKS
+
+ @pytest.mark.regression
+ @pytest.mark.slow
+ def test_evaluation_returns_dict_structure(self, validator):
+ """Test that evaluation returns expected dictionary structure."""
+ results = validator.run_evaluation()
+
+ # Check structure
+ assert isinstance(results, dict)
+ assert set(results.keys()) == {"mteb_main_score", "mteb_task_scores"}
+
+ # Check types
+ assert isinstance(results["mteb_main_score"], float)
+ assert isinstance(results["mteb_task_scores"], dict)
+
+ # Check task scores structure
+ for task_name, score in results["mteb_task_scores"].items():
+ assert isinstance(task_name, str)
+ assert isinstance(score, int | float)
diff --git a/tests/unit/benchmark/quality/test_validators.py b/tests/unit/benchmark/quality/test_validators.py
new file mode 100644
index 000000000..08b286f4b
--- /dev/null
+++ b/tests/unit/benchmark/quality/test_validators.py
@@ -0,0 +1,306 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from guidellm.benchmark.quality.validators import compute_cosine_similarity
+
+# Check for sentence-transformers availability for quality validator tests
+try:
+ import sentence_transformers # noqa: F401
+
+ EMBEDDINGS_VALIDATOR_AVAILABLE = True
+except ImportError:
+ EMBEDDINGS_VALIDATOR_AVAILABLE = False
+
+if EMBEDDINGS_VALIDATOR_AVAILABLE:
+ from guidellm.benchmark.quality.validators import EmbeddingsQualityValidator
+
+
+class TestComputeCosineSimilarity:
+ """Tests for cosine similarity computation function."""
+
+ @pytest.mark.smoke
+ def test_identical_vectors(self):
+ """Test cosine similarity of identical vectors is 1.0."""
+ vec = np.array([1.0, 2.0, 3.0, 4.0])
+ similarity = compute_cosine_similarity(vec, vec)
+ assert similarity == pytest.approx(1.0, abs=1e-6)
+
+ @pytest.mark.smoke
+ def test_orthogonal_vectors(self):
+ """Test cosine similarity of orthogonal vectors is 0.0."""
+ vec1 = np.array([1.0, 0.0, 0.0])
+ vec2 = np.array([0.0, 1.0, 0.0])
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert similarity == pytest.approx(0.0, abs=1e-6)
+
+ @pytest.mark.smoke
+ def test_opposite_vectors(self):
+ """Test cosine similarity of opposite vectors is -1.0."""
+ vec1 = np.array([1.0, 2.0, 3.0])
+ vec2 = np.array([-1.0, -2.0, -3.0])
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert similarity == pytest.approx(-1.0, abs=1e-6)
+
+ @pytest.mark.sanity
+ def test_similar_vectors(self):
+ """Test cosine similarity of similar vectors is close to 1.0."""
+ vec1 = np.array([1.0, 2.0, 3.0, 4.0])
+ vec2 = np.array([1.1, 2.1, 2.9, 4.0])
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert similarity > 0.99
+ assert similarity <= 1.0
+
+ @pytest.mark.sanity
+ def test_dissimilar_vectors(self):
+ """Test cosine similarity of dissimilar vectors is low."""
+ vec1 = np.array([1.0, 0.0, 0.0])
+ vec2 = np.array([0.1, 1.0, 0.0])
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert similarity < 0.2
+ assert similarity >= 0.0
+
+ @pytest.mark.sanity
+ def test_normalized_vectors(self):
+ """Test with pre-normalized vectors (unit length)."""
+ # Pre-normalized to unit length
+ vec1 = np.array([1.0, 0.0, 0.0])
+ vec2 = np.array([0.707107, 0.707107, 0.0]) # 45 degrees
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert similarity == pytest.approx(0.707107, abs=1e-5)
+
+ @pytest.mark.regression
+ def test_high_dimensional_vectors(self):
+ """Test with high-dimensional vectors (typical embedding size)."""
+ rng = np.random.default_rng(42)
+ vec1 = rng.random(384) # Common embedding dimension
+ vec2 = rng.random(384)
+
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert -1.0 <= similarity <= 1.0
+
+ @pytest.mark.regression
+ def test_zero_vector_handling(self):
+ """Test behavior with zero vectors (edge case)."""
+ vec1 = np.array([1.0, 2.0, 3.0])
+ vec2 = np.array([0.0, 0.0, 0.0])
+
+ # Zero vector should return 0.0 (implementation handles gracefully)
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert similarity == 0.0
+
+ @pytest.mark.regression
+ def test_single_dimension_vectors(self):
+ """Test with single-dimension vectors."""
+ vec1 = np.array([5.0])
+ vec2 = np.array([3.0])
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert similarity == pytest.approx(1.0, abs=1e-6)
+
+ vec3 = np.array([-5.0])
+ similarity_neg = compute_cosine_similarity(vec1, vec3)
+ assert similarity_neg == pytest.approx(-1.0, abs=1e-6)
+
+ @pytest.mark.sanity
+ def test_return_type(self):
+ """Test that return type is Python float."""
+ vec1 = np.array([1.0, 2.0, 3.0])
+ vec2 = np.array([4.0, 5.0, 6.0])
+ similarity = compute_cosine_similarity(vec1, vec2)
+ assert isinstance(similarity, float)
+
+
+@pytest.mark.skipif(
+ not EMBEDDINGS_VALIDATOR_AVAILABLE,
+ reason="EmbeddingsQualityValidator requires sentence-transformers",
+)
+class TestEmbeddingsQualityValidator:
+ """Tests for EmbeddingsQualityValidator class."""
+
+ @pytest.fixture
+ def validator(self):
+ """Create a validator with a test model."""
+ # Use a small, fast model for testing
+ return EmbeddingsQualityValidator(
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ @pytest.mark.smoke
+ def test_initialization(self, validator):
+ """Test validator initialization."""
+ assert validator is not None
+ assert validator.baseline_model is not None
+
+ @pytest.mark.sanity
+ def test_validate_against_baseline_same_model(self, validator):
+ """Test validation against baseline with same model."""
+ text = "This is a test sentence for embeddings."
+
+ # Get baseline embedding
+ baseline_embedding = validator.baseline_model.encode(text)
+
+ # Validate against itself (should be very high similarity)
+ similarity = validator.validate_against_baseline(text, baseline_embedding)
+
+ assert similarity == pytest.approx(1.0, abs=1e-6)
+ assert isinstance(similarity, float)
+
+ @pytest.mark.sanity
+ def test_validate_against_baseline_different_embedding(self, validator):
+ """Test validation with a different (random) embedding."""
+ text = "This is a test sentence."
+
+ # Create a random embedding (different from baseline)
+ rng = np.random.default_rng(42)
+ random_embedding = rng.random(384) # MiniLM dimension
+ # Normalize to unit length
+ random_embedding = random_embedding / np.linalg.norm(random_embedding)
+
+ similarity = validator.validate_against_baseline(text, random_embedding)
+
+ # Random embedding should have low similarity
+ assert similarity < 0.5
+ assert similarity >= -1.0
+
+ @pytest.mark.regression
+ def test_validate_multiple_texts(self, validator):
+ """Test validation with multiple different texts."""
+ texts = [
+ "Machine learning is a subset of artificial intelligence.",
+ "The weather today is sunny and warm.",
+ "Python is a popular programming language.",
+ ]
+
+ for text in texts:
+ baseline_embedding = validator.baseline_model.encode(text)
+ similarity = validator.validate_against_baseline(text, baseline_embedding)
+ # Same model should have perfect similarity
+ assert similarity == pytest.approx(1.0, abs=1e-6)
+
+ @pytest.mark.sanity
+ def test_check_self_consistency_identical_embeddings(self, validator):
+ """Test self-consistency with identical embeddings."""
+ text = "Test sentence for consistency check."
+
+ # Generate same embedding twice
+ emb1 = validator.baseline_model.encode(text)
+ emb2 = validator.baseline_model.encode(text)
+
+ consistency = validator.check_self_consistency(text, [emb1, emb2])
+
+ # Should be perfectly consistent
+ assert consistency == pytest.approx(1.0, abs=1e-6)
+
+ @pytest.mark.sanity
+ def test_check_self_consistency_single_embedding(self, validator):
+ """Test self-consistency with only one embedding."""
+ text = "Single embedding test."
+ emb = validator.baseline_model.encode(text)
+
+ consistency = validator.check_self_consistency(text, [emb])
+
+ # Single embedding should return 1.0 (perfectly consistent)
+ assert consistency == 1.0
+
+ @pytest.mark.sanity
+ def test_check_self_consistency_empty_list(self, validator):
+ """Test self-consistency with empty embedding list."""
+ text = "Empty list test."
+
+ consistency = validator.check_self_consistency(text, [])
+
+ # Empty list should return 1.0 (no inconsistency)
+ assert consistency == 1.0
+
+ @pytest.mark.regression
+ def test_check_self_consistency_multiple_embeddings(self, validator):
+ """Test self-consistency with multiple embeddings."""
+ text = "Test sentence for multiple embeddings."
+
+ # Generate same embedding multiple times
+ embeddings = [validator.baseline_model.encode(text) for _ in range(5)]
+
+ consistency = validator.check_self_consistency(text, embeddings)
+
+ # Should be highly consistent (model is deterministic)
+ assert consistency == pytest.approx(1.0, abs=1e-6)
+
+ @pytest.mark.regression
+ def test_check_self_consistency_different_embeddings(self, validator):
+ """Test self-consistency with intentionally different embeddings."""
+ text = "Consistency test."
+ rng = np.random.default_rng(42)
+
+ # First embedding from model
+ emb1 = validator.baseline_model.encode(text)
+
+ # Second embedding is random
+ emb2 = rng.random(384)
+ emb2 = emb2 / np.linalg.norm(emb2)
+
+ consistency = validator.check_self_consistency(text, [emb1, emb2])
+
+ # Should have low consistency
+ assert consistency < 0.5
+
+ @pytest.mark.sanity
+ def test_embedding_dimensions(self, validator):
+ """Test that baseline model produces expected dimensions."""
+ text = "Dimension test."
+ embedding = validator.baseline_model.encode(text)
+
+ # MiniLM-L6-v2 produces 384-dimensional embeddings
+ assert embedding.shape == (384,)
+
+ @pytest.mark.regression
+ def test_baseline_model_deterministic(self, validator):
+ """Test that baseline model produces deterministic results."""
+ text = "Deterministic test."
+
+ # Encode same text multiple times
+ emb1 = validator.baseline_model.encode(text)
+ emb2 = validator.baseline_model.encode(text)
+ emb3 = validator.baseline_model.encode(text)
+
+ # All embeddings should be identical
+ assert np.allclose(emb1, emb2, atol=1e-6)
+ assert np.allclose(emb2, emb3, atol=1e-6)
+
+ @pytest.mark.sanity
+ def test_similarity_range(self, validator):
+ """Test that similarity values are within valid range."""
+ texts = [
+ "First test sentence.",
+ "Second test sentence.",
+ "Completely different topic about weather.",
+ ]
+
+ for text in texts:
+ baseline_emb = validator.baseline_model.encode(text)
+ similarity = validator.validate_against_baseline(text, baseline_emb)
+
+ # Similarity should always be in [-1, 1]
+ assert -1.0 <= similarity <= 1.0
+
+ @pytest.mark.regression
+ def test_vllm_tolerance_standard(self, validator):
+ """Test that similarity meets vLLM standard tolerance (1e-2)."""
+ text = "vLLM tolerance test."
+
+ baseline_emb = validator.baseline_model.encode(text)
+ similarity = validator.validate_against_baseline(text, baseline_emb)
+
+ # Same model should easily meet 1e-2 tolerance
+ assert abs(1.0 - similarity) < 1e-2
+
+ @pytest.mark.regression
+ def test_vllm_tolerance_mteb(self, validator):
+ """Test that similarity meets vLLM MTEB tolerance (5e-4)."""
+ text = "vLLM MTEB tolerance test."
+
+ baseline_emb = validator.baseline_model.encode(text)
+ similarity = validator.validate_against_baseline(text, baseline_emb)
+
+ # Same model should easily meet 5e-4 tolerance
+ assert abs(1.0 - similarity) < 5e-4
diff --git a/tests/unit/benchmark/schemas/embeddings/__init__.py b/tests/unit/benchmark/schemas/embeddings/__init__.py
new file mode 100644
index 000000000..ea7cc06e7
--- /dev/null
+++ b/tests/unit/benchmark/schemas/embeddings/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for embeddings benchmark schemas."""
diff --git a/tests/unit/benchmark/schemas/embeddings/test_accumulator.py b/tests/unit/benchmark/schemas/embeddings/test_accumulator.py
new file mode 100644
index 000000000..15c7c7677
--- /dev/null
+++ b/tests/unit/benchmark/schemas/embeddings/test_accumulator.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+import pytest
+
+from guidellm.benchmark.schemas.embeddings.accumulator import (
+ EmbeddingsBenchmarkAccumulator,
+ EmbeddingsQualityMetricsAccumulator,
+)
+
+
+class TestEmbeddingsQualityMetricsAccumulator:
+ """Tests for EmbeddingsQualityMetricsAccumulator."""
+
+ @pytest.mark.smoke
+ def test_initialization(self):
+ """Test accumulator initialization."""
+ accumulator = EmbeddingsQualityMetricsAccumulator()
+ assert accumulator.cosine_similarities == []
+
+ @pytest.mark.sanity
+ def test_add_cosine_similarity(self):
+ """Test adding cosine similarity values."""
+ accumulator = EmbeddingsQualityMetricsAccumulator()
+
+ # Add some cosine similarity values
+ accumulator.cosine_similarities.append(0.98)
+ accumulator.cosine_similarities.append(0.97)
+ accumulator.cosine_similarities.append(0.99)
+
+ assert len(accumulator.cosine_similarities) == 3
+ assert accumulator.cosine_similarities[0] == 0.98
+ assert accumulator.cosine_similarities[1] == 0.97
+ assert accumulator.cosine_similarities[2] == 0.99
+
+ @pytest.mark.sanity
+ def test_multiple_instances_independent(self):
+ """Test that multiple accumulator instances are independent."""
+ acc1 = EmbeddingsQualityMetricsAccumulator()
+ acc2 = EmbeddingsQualityMetricsAccumulator()
+
+ acc1.cosine_similarities.append(0.95)
+ acc2.cosine_similarities.append(0.99)
+
+ assert len(acc1.cosine_similarities) == 1
+ assert len(acc2.cosine_similarities) == 1
+ assert acc1.cosine_similarities[0] != acc2.cosine_similarities[0]
+
+
+class TestEmbeddingsBenchmarkAccumulator:
+ """Tests for EmbeddingsBenchmarkAccumulator."""
+
+ @pytest.mark.smoke
+ def test_class_signatures(self):
+ """Validate public surface and key properties."""
+ # Check that class has expected attributes (will be set during init
+ # with config)
+ assert hasattr(EmbeddingsBenchmarkAccumulator, "model_fields")
+ assert "quality" in EmbeddingsBenchmarkAccumulator.model_fields
+ assert (
+ "encoding_format_breakdown"
+ in EmbeddingsBenchmarkAccumulator.model_fields
+ )
+
+ @pytest.mark.smoke
+ def test_initialization(self):
+ """Test accumulator has proper default fields."""
+ # EmbeddingsBenchmarkAccumulator requires a BenchmarkConfig for full
+ # instantiation but we can test that the class has expected fields
+ fields = EmbeddingsBenchmarkAccumulator.model_fields
+
+ assert "quality_enabled" in fields
+ assert "quality" in fields
+ assert "encoding_format_breakdown" in fields
+ assert "timings" in fields
+ assert "scheduler_metrics" in fields
+ assert "metrics" in fields
+ assert "requests" in fields
+
+ @pytest.mark.sanity
+ def test_encoding_format_breakdown_field(self):
+ """Test that encoding_format_breakdown field exists and is a dict."""
+ # Test that the field schema is correct
+ fields = EmbeddingsBenchmarkAccumulator.model_fields
+ assert "encoding_format_breakdown" in fields
+
+ # Field should be a dict type
+ field_info = fields["encoding_format_breakdown"]
+ assert field_info.annotation == dict[str, int]
+
+ @pytest.mark.sanity
+ def test_quality_metrics_accumulator_field(self):
+ """Test that quality field exists and has correct type."""
+ fields = EmbeddingsBenchmarkAccumulator.model_fields
+ assert "quality" in fields
+ assert "quality_enabled" in fields
+
+ # Field should be optional EmbeddingsQualityMetricsAccumulator
+ field_info = fields["quality"]
+ # Check field is optional (can be None)
+ assert field_info.is_required() is False
+
+ @pytest.mark.regression
+ def test_accumulator_field_defaults(self):
+ """Test that accumulator fields have proper default factories."""
+ fields = EmbeddingsBenchmarkAccumulator.model_fields
+
+ # Check fields with default factories
+ assert "timings" in fields
+ assert "scheduler_metrics" in fields
+ assert "metrics" in fields
+ assert "requests" in fields
+
+ # Check that encoding_format_breakdown has dict factory
+ assert fields["encoding_format_breakdown"].default_factory is not None
+
+ @pytest.mark.regression
+ def test_type_literal(self):
+ """Test that type_ field is correctly set."""
+ fields = EmbeddingsBenchmarkAccumulator.model_fields
+ assert "type_" in fields
+
+ # Check the default value
+ assert fields["type_"].default == "embeddings_benchmark_accumulator"
diff --git a/tests/unit/benchmark/schemas/embeddings/test_entrypoints.py b/tests/unit/benchmark/schemas/embeddings/test_entrypoints.py
new file mode 100644
index 000000000..bc97ad51c
--- /dev/null
+++ b/tests/unit/benchmark/schemas/embeddings/test_entrypoints.py
@@ -0,0 +1,275 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from guidellm.benchmark.schemas.embeddings.entrypoints import BenchmarkEmbeddingsArgs
+
+
+class TestBenchmarkEmbeddingsArgs:
+ """Tests for BenchmarkEmbeddingsArgs schema."""
+
+ @pytest.mark.smoke
+ def test_class_signatures(self):
+ """Validate public surface and key properties."""
+ fields = BenchmarkEmbeddingsArgs.model_fields
+
+ # Standard benchmark args
+ for field_name in (
+ "target",
+ "model",
+ "backend",
+ "profile",
+ "data",
+ "outputs",
+ ):
+ assert field_name in fields
+
+ # Embeddings-specific args
+ for field_name in (
+ "enable_quality_validation",
+ "baseline_model",
+ "quality_tolerance",
+ "enable_mteb",
+ "mteb_tasks",
+ "encoding_format",
+ ):
+ assert field_name in fields
+
+ @pytest.mark.smoke
+ def test_initialization_minimal(self):
+ """Test initialization with minimal required fields."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ )
+
+ assert args.target == "http://localhost:8000"
+ assert args.enable_quality_validation is False
+ assert args.baseline_model is None
+ assert args.quality_tolerance == 1e-2
+ assert args.enable_mteb is False
+ assert args.mteb_tasks is None
+ assert args.encoding_format == "float" # Default is "float"
+
+ @pytest.mark.sanity
+ def test_initialization_with_quality_validation(self):
+ """Test initialization with quality validation enabled."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ model="test-model",
+ enable_quality_validation=True,
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2",
+ quality_tolerance=5e-4,
+ )
+
+ assert args.enable_quality_validation is True
+ assert args.baseline_model == "sentence-transformers/all-MiniLM-L6-v2"
+ assert args.quality_tolerance == 5e-4
+
+ @pytest.mark.sanity
+ def test_initialization_with_mteb(self):
+ """Test initialization with MTEB enabled."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ enable_mteb=True,
+ mteb_tasks=["STS12", "STS13", "STSBenchmark"],
+ )
+
+ assert args.enable_mteb is True
+ assert args.mteb_tasks == ["STS12", "STS13", "STSBenchmark"]
+
+ @pytest.mark.sanity
+ def test_initialization_with_encoding_format(self):
+ """Test initialization with encoding format."""
+ # Float encoding
+ args_float = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ encoding_format="float",
+ )
+ assert args_float.encoding_format == "float"
+
+ # Base64 encoding
+ args_base64 = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ encoding_format="base64",
+ )
+ assert args_base64.encoding_format == "base64"
+
+ @pytest.mark.sanity
+ def test_initialization_all_fields(self):
+ """Test initialization with all embeddings-specific fields."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ model="test-embedding-model",
+ backend="openai_http",
+ profile="sweep",
+ data=["embeddings_data.json"],
+ outputs=["json", "csv", "html"],
+ enable_quality_validation=True,
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2",
+ quality_tolerance=1e-3,
+ enable_mteb=True,
+ mteb_tasks=["STS12", "STS13"],
+ encoding_format="float",
+ )
+
+ # Standard fields
+ assert args.target == "http://localhost:8000"
+ assert args.model == "test-embedding-model"
+ assert args.backend == "openai_http"
+ assert args.profile == "sweep"
+ assert args.data == ["embeddings_data.json"]
+ assert args.outputs == ["json", "csv", "html"]
+
+ # Embeddings-specific fields
+ assert args.enable_quality_validation is True
+ assert args.baseline_model == "sentence-transformers/all-MiniLM-L6-v2"
+ assert args.quality_tolerance == 1e-3
+ assert args.enable_mteb is True
+ assert args.mteb_tasks == ["STS12", "STS13"]
+ assert args.encoding_format == "float"
+
+ @pytest.mark.sanity
+ def test_invalid_initialization_missing_target(self):
+ """Missing target should fail validation."""
+ with pytest.raises(ValidationError):
+ BenchmarkEmbeddingsArgs() # type: ignore[call-arg]
+
+ @pytest.mark.sanity
+ @pytest.mark.parametrize(
+ ("field_name", "bad_value"),
+ [
+ ("target", None),
+ ("target", 123),
+ ("model", 123),
+ ("enable_quality_validation", "not_a_bool"),
+ ("quality_tolerance", "not_a_float"),
+ ("enable_mteb", "not_a_bool"),
+ ("mteb_tasks", "not_a_list"),
+ ("encoding_format", 123),
+ ],
+ )
+ def test_invalid_initialization_values(self, field_name: str, bad_value):
+ """Type mismatches should raise."""
+ base = {"target": "http://localhost:8000"}
+ base[field_name] = bad_value
+ with pytest.raises(ValidationError):
+ BenchmarkEmbeddingsArgs(**base) # type: ignore[arg-type]
+
+ @pytest.mark.smoke
+ def test_marshalling(self):
+ """Test model_dump / model_validate round-trip."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ model="test-model",
+ data=["test_data.json"], # Need at least one data item
+ enable_quality_validation=True,
+ baseline_model="sentence-transformers/all-MiniLM-L6-v2",
+ quality_tolerance=1e-3,
+ )
+
+ dumped = args.model_dump()
+ rebuilt = BenchmarkEmbeddingsArgs.model_validate(dumped)
+
+ assert rebuilt.target == args.target
+ assert rebuilt.model == args.model
+ assert rebuilt.enable_quality_validation == args.enable_quality_validation
+ assert rebuilt.baseline_model == args.baseline_model
+ assert rebuilt.quality_tolerance == args.quality_tolerance
+
+ @pytest.mark.regression
+ def test_quality_tolerance_default_value(self):
+ """Test default quality tolerance matches vLLM pattern (1e-2)."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ )
+ assert args.quality_tolerance == 1e-2
+
+ @pytest.mark.regression
+ def test_mteb_tasks_default_none(self):
+ """Test MTEB tasks default to None (will use DEFAULT_MTEB_TASKS in
+ validator)."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ enable_mteb=True,
+ )
+ # mteb_tasks should be None by default
+ # The validator will set DEFAULT_MTEB_TASKS if None
+ assert args.mteb_tasks is None or isinstance(args.mteb_tasks, list)
+
+ @pytest.mark.sanity
+ def test_optional_fields(self):
+ """Test that embeddings-specific fields are optional."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ )
+
+ # All embeddings-specific fields should have defaults
+ assert args.enable_quality_validation is False
+ assert args.baseline_model is None
+ assert args.quality_tolerance == 1e-2
+ assert args.enable_mteb is False
+ assert args.mteb_tasks is None
+ assert args.encoding_format == "float" # Default is "float", not None
+
+ @pytest.mark.regression
+ def test_quality_validation_without_baseline_model(self):
+ """Test quality validation can be enabled without explicit baseline model."""
+ # Should be valid - baseline model can be determined later or use default
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ enable_quality_validation=True,
+ )
+
+ assert args.enable_quality_validation is True
+ assert args.baseline_model is None
+
+ @pytest.mark.regression
+ def test_mteb_tasks_as_list(self):
+ """Test MTEB tasks can be specified as a list."""
+ tasks = ["STS12", "STS13", "STS14", "STS15", "STSBenchmark"]
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ enable_mteb=True,
+ mteb_tasks=tasks,
+ )
+
+ assert args.mteb_tasks == tasks
+ assert len(args.mteb_tasks) == 5
+
+ @pytest.mark.sanity
+ def test_encoding_format_optional(self):
+ """Test encoding format has default value."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ )
+ # Default is "float"
+ assert args.encoding_format == "float"
+
+ @pytest.mark.regression
+ def test_standard_benchmark_args_inherited(self):
+ """Test that standard BenchmarkArgs fields are inherited."""
+ args = BenchmarkEmbeddingsArgs(
+ target="http://localhost:8000",
+ model="test-model",
+ backend="openai_http",
+ profile="sweep",
+ data=["data.json"],
+ outputs=["json", "csv"],
+ )
+
+ # These are inherited from BenchmarkArgs
+ assert hasattr(args, "target")
+ assert hasattr(args, "model")
+ assert hasattr(args, "backend")
+ assert hasattr(args, "profile")
+ assert hasattr(args, "data")
+ assert hasattr(args, "outputs")
+
+ # Verify values
+ assert args.target == "http://localhost:8000"
+ assert args.model == "test-model"
+ assert args.backend == "openai_http"
+ assert args.profile == "sweep"
+ assert args.data == ["data.json"]
+ assert args.outputs == ["json", "csv"]
diff --git a/tests/unit/benchmark/schemas/embeddings/test_metrics.py b/tests/unit/benchmark/schemas/embeddings/test_metrics.py
new file mode 100644
index 000000000..a4c2624a2
--- /dev/null
+++ b/tests/unit/benchmark/schemas/embeddings/test_metrics.py
@@ -0,0 +1,364 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from guidellm.benchmark.schemas.embeddings.metrics import (
+ EmbeddingsMetrics,
+ EmbeddingsQualityMetrics,
+)
+from guidellm.schemas import (
+ DistributionSummary,
+ Percentiles,
+ StatusBreakdown,
+ StatusDistributionSummary,
+)
+
+
+def create_percentiles(p50=0.5) -> Percentiles:
+ """Helper to create Percentiles with all required fields."""
+ return Percentiles(
+ p001=p50 * 0.5,
+ p01=p50 * 0.6,
+ p05=p50 * 0.7,
+ p10=p50 * 0.8,
+ p25=p50 * 0.9,
+ p50=p50,
+ p75=p50 * 1.05,
+ p90=p50 * 1.1,
+ p95=p50 * 1.15,
+ p99=p50 * 1.2,
+ p999=p50 * 1.25,
+ )
+
+
+def create_distribution_summary(
+ mean=0.5, median=0.5, mode=0.5, variance=0.01, std_dev=0.1,
+ min_val=0.1, max_val=1.0, count=100, total_sum=50.0
+) -> DistributionSummary:
+ """Helper to create DistributionSummary with all required fields."""
+ return DistributionSummary(
+ mean=mean,
+ median=median,
+ mode=mode,
+ variance=variance,
+ std_dev=std_dev,
+ min=min_val,
+ max=max_val,
+ count=count,
+ total_sum=total_sum,
+ percentiles=create_percentiles(median),
+ )
+
+
+class TestEmbeddingsQualityMetrics:
+ """Tests for EmbeddingsQualityMetrics schema."""
+
+ @pytest.mark.smoke
+ def test_class_signatures(self):
+ """Validate public surface and key properties."""
+ fields = EmbeddingsQualityMetrics.model_fields
+ for field_name in (
+ "baseline_cosine_similarity",
+ "self_consistency_score",
+ "mteb_main_score",
+ "mteb_task_scores",
+ ):
+ assert field_name in fields
+
+ @pytest.mark.smoke
+ def test_initialization_minimal(self):
+ """Test initialization with minimal required fields."""
+ metrics = EmbeddingsQualityMetrics()
+ assert metrics.baseline_cosine_similarity is None
+ assert metrics.self_consistency_score is None
+ assert metrics.mteb_main_score is None
+ assert metrics.mteb_task_scores is None
+
+ @pytest.mark.sanity
+ def test_initialization_with_cosine_similarity(self):
+ """Test initialization with baseline cosine similarity."""
+ dist = create_distribution_summary(
+ mean=0.98,
+ median=0.985,
+ mode=0.985,
+ variance=0.0001,
+ std_dev=0.01,
+ min_val=0.95,
+ max_val=0.99,
+ count=100,
+ total_sum=98.0,
+ )
+ status_dist = StatusDistributionSummary(
+ successful=dist,
+ errored=None,
+ incomplete=None,
+ total=None,
+ )
+
+ metrics = EmbeddingsQualityMetrics(
+ baseline_cosine_similarity=status_dist
+ )
+ assert metrics.baseline_cosine_similarity is not None
+ assert metrics.baseline_cosine_similarity.successful.mean == 0.98
+
+ @pytest.mark.sanity
+ def test_initialization_with_mteb_scores(self):
+ """Test initialization with MTEB scores."""
+ metrics = EmbeddingsQualityMetrics(
+ mteb_main_score=75.5,
+ mteb_task_scores={
+ "STS12": 72.3,
+ "STS13": 78.1,
+ "STSBenchmark": 80.9,
+ },
+ )
+ assert metrics.mteb_main_score == 75.5
+ assert metrics.mteb_task_scores is not None
+ assert len(metrics.mteb_task_scores) == 3
+ assert metrics.mteb_task_scores["STS12"] == 72.3
+
+ @pytest.mark.sanity
+ def test_initialization_all_fields(self):
+ """Test initialization with all fields populated."""
+ cos_dist = create_distribution_summary(
+ mean=0.98,
+ median=0.985,
+ mode=0.985,
+ variance=0.0001,
+ std_dev=0.01,
+ min_val=0.95,
+ max_val=0.99,
+ count=100,
+ total_sum=98.0,
+ )
+ cons_dist = create_distribution_summary(
+ mean=0.995,
+ median=0.997,
+ mode=0.997,
+ variance=0.000025,
+ std_dev=0.005,
+ min_val=0.98,
+ max_val=0.999,
+ count=100,
+ total_sum=99.5,
+ )
+
+ metrics = EmbeddingsQualityMetrics(
+ baseline_cosine_similarity=StatusDistributionSummary(
+ successful=cos_dist, errored=None, incomplete=None, total=None
+ ),
+ self_consistency_score=StatusDistributionSummary(
+ successful=cons_dist, errored=None, incomplete=None, total=None
+ ),
+ mteb_main_score=75.5,
+ mteb_task_scores={"STS12": 72.3, "STS13": 78.1},
+ )
+
+ assert metrics.baseline_cosine_similarity.successful.mean == 0.98
+ assert metrics.self_consistency_score.successful.mean == 0.995
+ assert metrics.mteb_main_score == 75.5
+ assert len(metrics.mteb_task_scores) == 2
+
+ @pytest.mark.smoke
+ def test_marshalling(self):
+ """Test model_dump / model_validate round-trip."""
+ metrics = EmbeddingsQualityMetrics(
+ mteb_main_score=75.5,
+ mteb_task_scores={"STS12": 72.3},
+ )
+ dumped = metrics.model_dump()
+ rebuilt = EmbeddingsQualityMetrics.model_validate(dumped)
+ assert rebuilt.mteb_main_score == metrics.mteb_main_score
+ assert rebuilt.mteb_task_scores == metrics.mteb_task_scores
+
+
+class TestEmbeddingsMetrics:
+ """Tests for EmbeddingsMetrics schema."""
+
+ @pytest.mark.smoke
+ def test_class_signatures(self):
+ """Validate public surface and key properties."""
+ fields = EmbeddingsMetrics.model_fields
+ for field_name in (
+ "request_totals",
+ "requests_per_second",
+ "request_concurrency",
+ "request_latency",
+ "input_tokens_count",
+ "input_tokens_per_second",
+ "quality",
+ "encoding_format_breakdown",
+ ):
+ assert field_name in fields
+
+ @pytest.mark.smoke
+ def test_initialization_minimal(self):
+ """Test initialization with required fields."""
+ metrics = EmbeddingsMetrics(
+ request_totals=StatusBreakdown(
+ successful=10, incomplete=0, errored=0, total=10
+ ),
+ requests_per_second=StatusDistributionSummary(),
+ request_concurrency=StatusDistributionSummary(),
+ request_latency=StatusDistributionSummary(),
+ input_tokens_count=StatusBreakdown(
+ successful=500, incomplete=0, errored=0, total=500
+ ),
+ input_tokens_per_second=StatusDistributionSummary(),
+ )
+
+ assert metrics.request_totals.successful == 10
+ assert metrics.input_tokens_count.successful == 500
+ assert metrics.quality is None
+ assert metrics.encoding_format_breakdown == {}
+
+ @pytest.mark.sanity
+ def test_initialization_with_quality_metrics(self):
+ """Test initialization with quality validation metrics."""
+ quality = EmbeddingsQualityMetrics(
+ mteb_main_score=75.5,
+ mteb_task_scores={"STS12": 72.3},
+ )
+
+ metrics = EmbeddingsMetrics(
+ request_totals=StatusBreakdown(
+ successful=10, incomplete=0, errored=0, total=10
+ ),
+ requests_per_second=StatusDistributionSummary(),
+ request_concurrency=StatusDistributionSummary(),
+ request_latency=StatusDistributionSummary(),
+ input_tokens_count=StatusBreakdown(
+ successful=500, incomplete=0, errored=0, total=500
+ ),
+ input_tokens_per_second=StatusDistributionSummary(),
+ quality=quality,
+ )
+
+ assert metrics.quality is not None
+ assert metrics.quality.mteb_main_score == 75.5
+
+ @pytest.mark.sanity
+ def test_initialization_with_encoding_breakdown(self):
+ """Test initialization with encoding format breakdown."""
+ metrics = EmbeddingsMetrics(
+ request_totals=StatusBreakdown(
+ successful=15, incomplete=0, errored=0, total=15
+ ),
+ requests_per_second=StatusDistributionSummary(),
+ request_concurrency=StatusDistributionSummary(),
+ request_latency=StatusDistributionSummary(),
+ input_tokens_count=StatusBreakdown(
+ successful=750, incomplete=0, errored=0, total=750
+ ),
+ input_tokens_per_second=StatusDistributionSummary(),
+ encoding_format_breakdown={"float": 10, "base64": 5},
+ )
+
+ assert metrics.encoding_format_breakdown == {"float": 10, "base64": 5}
+ assert sum(metrics.encoding_format_breakdown.values()) == 15
+
+ @pytest.mark.sanity
+ def test_initialization_all_fields(self):
+ """Test initialization with all fields populated."""
+ quality = EmbeddingsQualityMetrics(
+ mteb_main_score=75.5,
+ mteb_task_scores={"STS12": 72.3, "STS13": 78.1},
+ )
+
+ dist = create_distribution_summary(
+ mean=0.15,
+ median=0.14,
+ mode=0.14,
+ variance=0.0004,
+ std_dev=0.02,
+ min_val=0.10,
+ max_val=0.20,
+ count=100,
+ total_sum=15.0,
+ )
+
+ metrics = EmbeddingsMetrics(
+ request_totals=StatusBreakdown(
+ successful=100, incomplete=5, errored=2, total=107
+ ),
+ requests_per_second=StatusDistributionSummary(
+ successful=dist, errored=None, incomplete=None, total=None
+ ),
+ request_concurrency=StatusDistributionSummary(
+ successful=dist, errored=None, incomplete=None, total=None
+ ),
+ request_latency=StatusDistributionSummary(
+ successful=dist, errored=None, incomplete=None, total=None
+ ),
+ input_tokens_count=StatusBreakdown(
+ successful=5000, incomplete=200, errored=100, total=5300
+ ),
+ input_tokens_per_second=StatusDistributionSummary(
+ successful=dist, errored=None, incomplete=None, total=None
+ ),
+ quality=quality,
+ encoding_format_breakdown={"float": 80, "base64": 20},
+ )
+
+ assert metrics.request_totals.successful == 100
+ assert metrics.request_totals.total == 107
+ assert metrics.input_tokens_count.successful == 5000
+ assert metrics.quality.mteb_main_score == 75.5
+ assert metrics.encoding_format_breakdown["float"] == 80
+
+ @pytest.mark.sanity
+ def test_invalid_initialization_missing(self):
+ """Missing required fields should fail validation."""
+ with pytest.raises(ValidationError):
+ EmbeddingsMetrics() # type: ignore[call-arg]
+
+ @pytest.mark.smoke
+ def test_marshalling(self):
+ """Test model_dump / model_validate round-trip."""
+ metrics = EmbeddingsMetrics(
+ request_totals=StatusBreakdown(
+ successful=10, incomplete=0, errored=0, total=10
+ ),
+ requests_per_second=StatusDistributionSummary(),
+ request_concurrency=StatusDistributionSummary(),
+ request_latency=StatusDistributionSummary(),
+ input_tokens_count=StatusBreakdown(
+ successful=500, incomplete=0, errored=0, total=500
+ ),
+ input_tokens_per_second=StatusDistributionSummary(),
+ encoding_format_breakdown={"float": 10},
+ )
+
+ dumped = metrics.model_dump()
+ rebuilt = EmbeddingsMetrics.model_validate(dumped)
+ assert (
+ rebuilt.request_totals.successful
+ == metrics.request_totals.successful
+ )
+ assert (
+ rebuilt.input_tokens_count.successful
+ == metrics.input_tokens_count.successful
+ )
+ assert (
+ rebuilt.encoding_format_breakdown
+ == metrics.encoding_format_breakdown
+ )
+
+ @pytest.mark.regression
+ def test_no_output_tokens(self):
+ """Verify embeddings have dummy output token fields for compatibility."""
+ fields = EmbeddingsMetrics.model_fields
+ # Embeddings have dummy output token fields for progress tracker compatibility
+ # They exist but are always zero
+ assert "output_token_count" in fields
+ assert "output_tokens_per_second" in fields
+
+ @pytest.mark.regression
+ def test_no_streaming_metrics(self):
+ """Verify embeddings metrics do not have streaming-related fields."""
+ fields = EmbeddingsMetrics.model_fields
+ # Embeddings should NOT have streaming metrics
+ assert "time_to_first_token" not in fields
+ assert "inter_token_latency" not in fields
+ assert "time_per_output_token" not in fields
diff --git a/tests/unit/extras/test_audio.py b/tests/unit/extras/test_audio.py
index b7f783693..70235aab4 100644
--- a/tests/unit/extras/test_audio.py
+++ b/tests/unit/extras/test_audio.py
@@ -7,7 +7,14 @@
import pytest
import torch
-from guidellm.extras.audio import encode_audio
+# Skip all tests if torchcodec/audio dependencies aren't available
+try:
+ from guidellm.extras.audio import encode_audio
+except (ImportError, RuntimeError) as e:
+ pytest.skip(
+ f"Audio dependencies not available: {e}",
+ allow_module_level=True,
+ )
@pytest.fixture
diff --git a/tests/unit/mock_server/handlers/__init__.py b/tests/unit/mock_server/handlers/__init__.py
new file mode 100644
index 000000000..d069e344b
--- /dev/null
+++ b/tests/unit/mock_server/handlers/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for mock server handlers."""
diff --git a/tests/unit/mock_server/handlers/test_embeddings.py b/tests/unit/mock_server/handlers/test_embeddings.py
new file mode 100644
index 000000000..4d40259e1
--- /dev/null
+++ b/tests/unit/mock_server/handlers/test_embeddings.py
@@ -0,0 +1,368 @@
+from __future__ import annotations
+
+import base64
+import struct
+
+import pytest
+
+from guidellm.mock_server.config import MockServerConfig
+from guidellm.mock_server.handlers.embeddings import EmbeddingsHandler
+from guidellm.mock_server.models import (
+ EmbeddingsRequest,
+ EmbeddingsResponse,
+)
+
+
+class TestEmbeddingsHandler:
+ """Tests for embeddings mock server handler."""
+
+ @pytest.fixture
+ def handler(self):
+ """Create embeddings handler with default config."""
+ config = MockServerConfig()
+ return EmbeddingsHandler(config)
+
+ @pytest.fixture
+ def handler_with_ttft(self):
+ """Create embeddings handler with TTFT delay."""
+ config = MockServerConfig(ttft_ms=100.0)
+ return EmbeddingsHandler(config)
+
+ @pytest.mark.smoke
+ def test_initialization(self, handler):
+ """Test handler initialization."""
+ assert handler is not None
+ assert handler.config is not None
+
+ @pytest.mark.sanity
+ async def test_handle_basic_request(self, handler):
+ """Test handling a basic embeddings request."""
+ request = EmbeddingsRequest(
+ input="Test sentence for embedding.",
+ model="test-embedding-model",
+ )
+
+ response = await handler.handle(request)
+
+ assert isinstance(response, EmbeddingsResponse)
+ assert response.object == "list"
+ assert len(response.data) == 1
+ assert response.model == "test-embedding-model"
+
+ @pytest.mark.sanity
+ async def test_handle_single_string_input(self, handler):
+ """Test handling request with single string input."""
+ request = EmbeddingsRequest(
+ input="Single string input.",
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ assert len(response.data) == 1
+ assert response.data[0].index == 0
+ assert response.data[0].object == "embedding"
+
+ @pytest.mark.sanity
+ async def test_handle_list_input(self, handler):
+ """Test handling request with list of strings."""
+ inputs = [
+ "First sentence.",
+ "Second sentence.",
+ "Third sentence.",
+ ]
+
+ request = EmbeddingsRequest(
+ input=inputs,
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ assert len(response.data) == 3
+ for i, emb_obj in enumerate(response.data):
+ assert emb_obj.index == i
+ assert emb_obj.object == "embedding"
+
+ @pytest.mark.sanity
+ async def test_float_encoding(self, handler):
+ """Test float encoding format (default)."""
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ encoding_format="float",
+ )
+
+ response = await handler.handle(request)
+
+ # Embedding should be a list of floats
+ embedding = response.data[0].embedding
+ assert isinstance(embedding, list)
+ assert all(isinstance(x, float) for x in embedding)
+
+ @pytest.mark.sanity
+ async def test_base64_encoding(self, handler):
+ """Test base64 encoding format."""
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ encoding_format="base64",
+ )
+
+ response = await handler.handle(request)
+
+ # Embedding should be a base64-encoded string
+ embedding = response.data[0].embedding
+ assert isinstance(embedding, str)
+
+ # Verify it's valid base64
+ try:
+ decoded_bytes = base64.b64decode(embedding)
+ assert len(decoded_bytes) > 0
+ except Exception: # noqa: BLE001
+ pytest.fail("Invalid base64 encoding")
+
+ @pytest.mark.regression
+ async def test_base64_encoding_decodes_to_floats(self, handler):
+ """Test that base64 encoding can be decoded back to floats."""
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ encoding_format="base64",
+ )
+
+ response = await handler.handle(request)
+
+ # Decode base64 to float array
+ embedding_b64 = response.data[0].embedding
+ decoded_bytes = base64.b64decode(embedding_b64)
+
+ # Unpack as floats
+ num_floats = len(decoded_bytes) // 4 # 4 bytes per float
+ floats = struct.unpack(f"{num_floats}f", decoded_bytes)
+
+ # Should be a valid array of floats
+ assert len(floats) > 0
+ assert all(isinstance(x, float) for x in floats)
+
+ @pytest.mark.sanity
+ async def test_usage_metrics(self, handler):
+ """Test that usage metrics are populated."""
+ request = EmbeddingsRequest(
+ input="Test sentence with some tokens.",
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ assert response.usage is not None
+ assert response.usage.prompt_tokens > 0
+ assert response.usage.total_tokens > 0
+ # Embeddings don't have completion tokens
+ assert response.usage.completion_tokens == 0
+
+ @pytest.mark.regression
+ async def test_usage_metrics_batch(self, handler):
+ """Test usage metrics with batch input."""
+ inputs = [
+ "First sentence.",
+ "Second sentence.",
+ "Third sentence.",
+ ]
+
+ request = EmbeddingsRequest(
+ input=inputs,
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ # Total tokens should sum across all inputs
+ assert response.usage.prompt_tokens > 0
+ assert response.usage.total_tokens == response.usage.prompt_tokens
+
+ @pytest.mark.sanity
+ async def test_dimensions_parameter(self, handler):
+ """Test dimensions parameter (Matryoshka embeddings)."""
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ dimensions=128,
+ encoding_format="float",
+ )
+
+ response = await handler.handle(request)
+
+ # Embedding should have specified dimensions
+ embedding = response.data[0].embedding
+ assert len(embedding) == 128
+
+ @pytest.mark.regression
+ async def test_dimensions_default(self, handler):
+ """Test default dimensions when not specified."""
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ encoding_format="float",
+ )
+
+ response = await handler.handle(request)
+
+ # Default dimensions should be used (typically 384 or similar)
+ embedding = response.data[0].embedding
+ assert len(embedding) > 0
+ # Common default dimension sizes
+ assert len(embedding) in [384, 512, 768, 1024, 1536]
+
+ @pytest.mark.sanity
+ async def test_truncate_prompt_tokens(self, handler):
+ """Test truncate_prompt_tokens parameter."""
+ request = EmbeddingsRequest(
+ input="A very long sentence with many tokens that should be truncated.",
+ model="test-model",
+ truncate_prompt_tokens=10,
+ )
+
+ response = await handler.handle(request)
+
+ # Usage should reflect truncation
+ assert response.usage.prompt_tokens <= 10
+
+ @pytest.mark.regression
+ async def test_embedding_normalized(self, handler):
+ """Test that embeddings are normalized (unit length)."""
+ import math
+
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ encoding_format="float",
+ )
+
+ response = await handler.handle(request)
+
+ embedding = response.data[0].embedding
+
+ # Calculate norm (should be 1.0 for normalized vector)
+ norm = math.sqrt(sum(x * x for x in embedding))
+ assert norm == pytest.approx(1.0, abs=1e-6)
+
+ @pytest.mark.regression
+ async def test_multiple_embeddings_different(self, handler):
+ """Test that different inputs produce different embeddings."""
+ request = EmbeddingsRequest(
+ input=["First sentence.", "Second sentence."],
+ model="test-model",
+ encoding_format="float",
+ )
+
+ response = await handler.handle(request)
+
+ emb1 = response.data[0].embedding
+ emb2 = response.data[1].embedding
+
+ # Embeddings should be different (random generation)
+ assert emb1 != emb2
+
+ @pytest.mark.sanity
+ async def test_ttft_delay(self, handler_with_ttft):
+ """Test that TTFT delay is applied."""
+ import time
+
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ )
+
+ start = time.time()
+ await handler_with_ttft.handle(request)
+ elapsed = time.time() - start
+
+ # Should have some delay (at least 50ms for 100ms TTFT config)
+ assert elapsed >= 0.05 # Reduced threshold for test reliability
+
+ @pytest.mark.regression
+ async def test_empty_input(self, handler):
+ """Test handling empty input string."""
+ request = EmbeddingsRequest(
+ input="",
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ # Should still produce an embedding (possibly all zeros or minimal)
+ assert len(response.data) == 1
+ assert response.usage.prompt_tokens >= 0
+
+ @pytest.mark.regression
+ async def test_response_model_matches_request(self, handler):
+ """Test that response model matches request model."""
+ model_name = "custom-embedding-model-v2"
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model=model_name,
+ )
+
+ response = await handler.handle(request)
+
+ assert response.model == model_name
+
+ @pytest.mark.sanity
+ async def test_embedding_object_fields(self, handler):
+ """Test that embedding objects have correct fields."""
+ request = EmbeddingsRequest(
+ input=["First.", "Second."],
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ for emb_obj in response.data:
+ assert hasattr(emb_obj, "object")
+ assert hasattr(emb_obj, "embedding")
+ assert hasattr(emb_obj, "index")
+ assert emb_obj.object == "embedding"
+
+ @pytest.mark.regression
+ async def test_large_batch_input(self, handler):
+ """Test handling large batch of inputs."""
+ inputs = [f"Sentence number {i}." for i in range(100)]
+
+ request = EmbeddingsRequest(
+ input=inputs,
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ assert len(response.data) == 100
+ for i, emb_obj in enumerate(response.data):
+ assert emb_obj.index == i
+
+ @pytest.mark.regression
+ async def test_user_parameter(self, handler):
+ """Test user parameter (should be accepted but not affect output)."""
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ user="test-user-123",
+ )
+
+ response = await handler.handle(request)
+
+ # Should complete successfully
+ assert isinstance(response, EmbeddingsResponse)
+ assert len(response.data) == 1
+
+ @pytest.mark.sanity
+ async def test_response_object_field(self, handler):
+ """Test that response object field is 'list'."""
+ request = EmbeddingsRequest(
+ input="Test sentence.",
+ model="test-model",
+ )
+
+ response = await handler.handle(request)
+
+ assert response.object == "list"
diff --git a/tests/unit/schemas/test_embeddings_request_stats.py b/tests/unit/schemas/test_embeddings_request_stats.py
new file mode 100644
index 000000000..77e82f843
--- /dev/null
+++ b/tests/unit/schemas/test_embeddings_request_stats.py
@@ -0,0 +1,355 @@
+from __future__ import annotations
+
+import asyncio
+from typing import Any
+
+import numpy as np
+import pytest
+from pydantic import ValidationError
+
+from guidellm.schemas import (
+ EmbeddingsRequestStats,
+ RequestInfo,
+ StandardBaseDict,
+ UsageMetrics,
+)
+from tests.unit.testing_utils import async_timeout
+
+
+class TestEmbeddingsRequestStats:
+ """High-coverage, concise tests for EmbeddingsRequestStats."""
+
+ @pytest.fixture(
+ params=[
+ "short_embedding",
+ "long_embedding",
+ "batch_embedding",
+ "float_encoding",
+ "base64_encoding",
+ "with_cosine_similarity",
+ ],
+ )
+ def valid_instances(
+ self, request: pytest.FixtureRequest
+ ) -> tuple[EmbeddingsRequestStats, dict[str, Any]]:
+ """
+ Generate realistic test instances for embeddings requests.
+
+ Returns tuple of (EmbeddingsRequestStats instance, expected values dict).
+ """
+ case_id = request.param
+ rng = np.random.default_rng(hash(case_id) % (2**32))
+
+ # Define realistic scenarios based on common embeddings patterns
+ if case_id == "short_embedding":
+ # Quick embedding with few tokens
+ prompt_tokens = 10
+ request_start = 0.0
+ # Embeddings are faster than generative (no output tokens)
+ request_end = request_start + rng.uniform(0.05, 0.15)
+ resolve_end = request_end
+ encoding_format = "float"
+ cosine_similarity = None
+
+ elif case_id == "long_embedding":
+ # Longer text embedding
+ prompt_tokens = 512
+ request_start = 5.0
+ # Proportional to input size
+ request_end = request_start + rng.uniform(0.3, 0.6)
+ resolve_end = request_end
+ encoding_format = "float"
+ cosine_similarity = None
+
+ elif case_id == "batch_embedding":
+ # Batch processing
+ prompt_tokens = 150
+ request_start = 10.0
+ request_end = request_start + rng.uniform(0.2, 0.4)
+ resolve_end = request_end
+ encoding_format = "float"
+ cosine_similarity = None
+
+ elif case_id == "float_encoding":
+ # Float encoding (default)
+ prompt_tokens = 50
+ request_start = 0.0
+ request_end = request_start + rng.uniform(0.1, 0.2)
+ resolve_end = request_end
+ encoding_format = "float"
+ cosine_similarity = None
+
+ elif case_id == "base64_encoding":
+ # Base64 encoding
+ prompt_tokens = 50
+ request_start = 0.0
+ request_end = request_start + rng.uniform(0.1, 0.2)
+ resolve_end = request_end
+ encoding_format = "base64"
+ cosine_similarity = None
+
+ else: # with_cosine_similarity
+ # With quality validation
+ prompt_tokens = 25
+ request_start = 0.0
+ request_end = request_start + rng.uniform(0.08, 0.18)
+ resolve_end = request_end
+ encoding_format = "float"
+ # Realistic cosine similarity (0.95-0.99 for good models)
+ cosine_similarity = rng.uniform(0.95, 0.99)
+
+ # Build timings object via RequestInfo
+ info = RequestInfo(request_id=case_id, status="completed")
+ info.timings.request_start = request_start
+ info.timings.request_end = request_end
+ info.timings.resolve_end = resolve_end
+
+ stats = EmbeddingsRequestStats(
+ request_id=case_id,
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=prompt_tokens),
+ cosine_similarity=cosine_similarity,
+ encoding_format=encoding_format,
+ )
+
+ # Compute expected properties
+ expected_latency = (
+ request_end - request_start
+ if request_start is not None
+ else None
+ )
+
+ expected: dict[str, Any] = {
+ "request_start_time": (
+ request_start if request_start is not None else resolve_end
+ ),
+ "request_end_time": (
+ request_end if request_end is not None else resolve_end
+ ),
+ "request_latency": expected_latency,
+ "prompt_tokens": prompt_tokens,
+ "cosine_similarity": cosine_similarity,
+ "encoding_format": encoding_format,
+ }
+ return stats, expected
+
+ @pytest.mark.smoke
+ def test_class_signatures(self):
+ """Validate public surface, inheritance, and key properties."""
+ assert issubclass(EmbeddingsRequestStats, StandardBaseDict)
+ assert hasattr(EmbeddingsRequestStats, "model_dump")
+ assert hasattr(EmbeddingsRequestStats, "model_validate")
+
+ # fields exposed
+ fields = EmbeddingsRequestStats.model_fields
+ for field_name in (
+ "type_",
+ "request_id",
+ "request_args",
+ "response_id",
+ "info",
+ "input_metrics",
+ "cosine_similarity",
+ "encoding_format",
+ ):
+ assert field_name in fields
+
+ # computed properties
+ for prop_name in (
+ "request_start_time",
+ "request_end_time",
+ "request_latency",
+ "prompt_tokens",
+ ):
+ assert hasattr(EmbeddingsRequestStats, prop_name)
+
+ @pytest.mark.smoke
+ def test_initialization(self, valid_instances):
+ """Initialization from realistic inputs."""
+ instance, expected = valid_instances
+ assert isinstance(instance, EmbeddingsRequestStats)
+ assert instance.type_ == "embeddings_request_stats"
+ assert instance.request_id
+
+ # Basic fields echo
+ assert instance.prompt_tokens == expected["prompt_tokens"]
+ assert instance.encoding_format == expected["encoding_format"]
+ if expected["cosine_similarity"] is not None:
+ assert instance.cosine_similarity == pytest.approx(
+ expected["cosine_similarity"], rel=1e-6, abs=1e-6
+ )
+
+ @pytest.mark.sanity
+ def test_invalid_initialization_missing(self):
+ """Missing required fields should fail validation."""
+ with pytest.raises(ValidationError):
+ EmbeddingsRequestStats() # type: ignore[call-arg]
+
+ @pytest.mark.sanity
+ @pytest.mark.parametrize(
+ ("field_name", "bad_value"),
+ [
+ ("request_id", None),
+ ("request_id", 123),
+ ("info", None),
+ ("info", "not_request_info"),
+ ("input_metrics", None),
+ ("input_metrics", "not_usage_metrics"),
+ ("cosine_similarity", "not_a_float"),
+ ("encoding_format", 123),
+ ],
+ )
+ def test_invalid_initialization_values(self, field_name: str, bad_value: Any):
+ """Type/None mismatches should raise."""
+ info = RequestInfo(request_id="bad-1", status="completed")
+ info.timings.resolve_end = 1.0
+ base = {
+ "request_id": "ok",
+ "info": info,
+ "input_metrics": UsageMetrics(text_tokens=1),
+ }
+ base[field_name] = bad_value
+ with pytest.raises(ValidationError):
+ EmbeddingsRequestStats(**base) # type: ignore[arg-type]
+
+ @pytest.mark.regression
+ def test_computed_properties_match_expected(self, valid_instances):
+ """All computed properties should match precomputed expectations."""
+ instance, expected = valid_instances
+
+ # direct scalar comparisons
+ for key in (
+ "request_start_time",
+ "request_end_time",
+ "request_latency",
+ "prompt_tokens",
+ ):
+ got = getattr(instance, key)
+ exp = expected[key]
+ if isinstance(exp, float):
+ # tolerant float compare
+ assert (got is None and exp is None) or pytest.approx(
+ exp, rel=1e-6, abs=1e-6
+ ) == got
+ else:
+ assert got == exp
+
+ @pytest.mark.sanity
+ def test_none_paths_for_latency(self):
+ """Ensure None is returned when required timing parts are missing."""
+ info = RequestInfo(request_id="none-lat", status="completed")
+ info.timings.resolve_end = 1.0 # minimal to avoid property error
+ instance = EmbeddingsRequestStats(
+ request_id="none-lat",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=10),
+ )
+ assert instance.request_latency is None
+
+ @pytest.mark.smoke
+ def test_marshalling(self, valid_instances):
+ """model_dump / model_validate round-trip."""
+ instance, _ = valid_instances
+ dumped = instance.model_dump()
+ assert dumped["type_"] == "embeddings_request_stats"
+ rebuilt = EmbeddingsRequestStats.model_validate(dumped)
+ assert rebuilt.request_id == instance.request_id
+ assert rebuilt.prompt_tokens == instance.prompt_tokens
+ assert rebuilt.encoding_format == instance.encoding_format
+
+ @pytest.mark.sanity
+ def test_optional_fields(self):
+ """Test optional fields request_args, cosine_similarity."""
+ info = RequestInfo(request_id="opt-test", status="completed")
+ info.timings.resolve_end = 10.0
+
+ # Without optional fields
+ instance = EmbeddingsRequestStats(
+ request_id="opt-test",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=5),
+ )
+ assert instance.request_args is None
+ assert instance.cosine_similarity is None
+ assert instance.encoding_format == "float" # default
+
+ # With optional fields
+ instance_with_opts = EmbeddingsRequestStats(
+ request_id="opt-test-2",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=5),
+ request_args="dimensions=384",
+ cosine_similarity=0.987,
+ encoding_format="base64",
+ )
+ assert instance_with_opts.request_args == "dimensions=384"
+ assert instance_with_opts.cosine_similarity == 0.987
+ assert instance_with_opts.encoding_format == "base64"
+
+ @pytest.mark.sanity
+ def test_encoding_format_values(self):
+ """Test valid encoding format values."""
+ info = RequestInfo(request_id="enc-test", status="completed")
+ info.timings.resolve_end = 10.0
+
+ # Float encoding
+ instance_float = EmbeddingsRequestStats(
+ request_id="enc-float",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=5),
+ encoding_format="float",
+ )
+ assert instance_float.encoding_format == "float"
+
+ # Base64 encoding
+ instance_base64 = EmbeddingsRequestStats(
+ request_id="enc-base64",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=5),
+ encoding_format="base64",
+ )
+ assert instance_base64.encoding_format == "base64"
+
+ @pytest.mark.sanity
+ def test_cosine_similarity_range(self):
+ """Test cosine similarity values within expected range."""
+ info = RequestInfo(request_id="cos-test", status="completed")
+ info.timings.resolve_end = 10.0
+
+ # Valid cosine similarity values (-1 to 1)
+ for cos_val in [-1.0, -0.5, 0.0, 0.5, 0.99, 1.0]:
+ instance = EmbeddingsRequestStats(
+ request_id=f"cos-{cos_val}",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=5),
+ cosine_similarity=cos_val,
+ )
+ assert instance.cosine_similarity == pytest.approx(cos_val, abs=1e-6)
+
+ @pytest.mark.regression
+ def test_zero_division_edge_cases(self):
+ """Test edge cases that could cause zero division errors."""
+ info = RequestInfo(request_id="zero-div", status="completed")
+ info.timings.resolve_end = 10.0
+ info.timings.request_start = 10.0 # Same as end
+ info.timings.request_end = 10.0
+
+ stats = EmbeddingsRequestStats(
+ request_id="zero-div",
+ info=info,
+ input_metrics=UsageMetrics(text_tokens=5),
+ )
+
+ # Zero latency should be returned as 0.0 (not None, no division error)
+ assert stats.request_latency == 0.0
+
+ @pytest.mark.sanity
+ @pytest.mark.asyncio
+ @async_timeout(0.2)
+ async def test_async_context_usage(self, valid_instances):
+ """Light async smoke to satisfy async-timeout policy."""
+ instance, expected = valid_instances
+ await asyncio.sleep(0) # yield
+ assert instance.request_id
+ assert instance.prompt_tokens == expected["prompt_tokens"]
+ assert instance.encoding_format == expected["encoding_format"]
diff --git a/tox.ini b/tox.ini
index b6ae685e6..ce4a84196 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,6 +6,9 @@ env_list = py3{10,11,12,13}
[testenv:tests]
description = Run all tests
dependency_groups = dev
+deps =
+ pytest-httpx~=0.35.0
+ respx~=0.22.0
commands =
python -m pytest {posargs:tests/}
diff --git a/uv.lock b/uv.lock
index a068c69b2..8ca6a83bb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -801,6 +801,7 @@ dependencies = [
{ name = "ftfy" },
{ name = "httpx", extra = ["http2"] },
{ name = "loguru" },
+ { name = "more-itertools" },
{ name = "msgpack" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
{ name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -824,8 +825,10 @@ all = [
{ name = "mistral-common" },
{ name = "msgpack" },
{ name = "msgspec" },
+ { name = "mteb" },
{ name = "orjson" },
{ name = "pillow" },
+ { name = "sentence-transformers" },
{ name = "tiktoken" },
{ name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
{ name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
@@ -851,6 +854,7 @@ dev = [
{ name = "mkdocs-linkcheck" },
{ name = "msgpack" },
{ name = "msgspec" },
+ { name = "mteb" },
{ name = "mypy" },
{ name = "orjson" },
{ name = "pandas-stubs" },
@@ -867,6 +871,7 @@ dev = [
{ name = "ruff" },
{ name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
{ name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "sentence-transformers" },
{ name = "setuptools" },
{ name = "setuptools-git-versioning" },
{ name = "sphinx" },
@@ -880,6 +885,10 @@ dev = [
{ name = "types-toml" },
{ name = "uvloop" },
]
+embeddings = [
+ { name = "mteb" },
+ { name = "sentence-transformers" },
+]
perf = [
{ name = "msgpack" },
{ name = "msgspec" },
@@ -923,7 +932,7 @@ requires-dist = [
{ name = "faker" },
{ name = "ftfy", specifier = ">=6.0.0" },
{ name = "guidellm", extras = ["all"], marker = "extra == 'dev'" },
- { name = "guidellm", extras = ["audio", "perf", "tokenizers", "vision"], marker = "extra == 'all'" },
+ { name = "guidellm", extras = ["audio", "embeddings", "perf", "tokenizers", "vision"], marker = "extra == 'all'" },
{ name = "guidellm", extras = ["perf", "tokenizers"], marker = "extra == 'recommended'" },
{ name = "httpx", extras = ["http2"], specifier = "<1.0.0" },
{ name = "loguru" },
@@ -934,9 +943,11 @@ requires-dist = [
{ name = "mdformat-gfm", marker = "extra == 'dev'", specifier = "~=1.0.0" },
{ name = "mistral-common", marker = "extra == 'tokenizers'" },
{ name = "mkdocs-linkcheck", marker = "extra == 'dev'", specifier = "~=1.0.6" },
+ { name = "more-itertools", specifier = ">=10.8.0" },
{ name = "msgpack" },
{ name = "msgpack", marker = "extra == 'perf'" },
{ name = "msgspec", marker = "extra == 'perf'" },
+ { name = "mteb", marker = "extra == 'embeddings'", specifier = ">=1.0.0" },
{ name = "mypy", marker = "extra == 'dev'", specifier = "~=1.15.0" },
{ name = "numpy", specifier = ">=2.0.0" },
{ name = "orjson", marker = "extra == 'perf'" },
@@ -959,6 +970,7 @@ requires-dist = [
{ name = "ruff", marker = "extra == 'dev'", specifier = "~=0.11.7" },
{ name = "sanic" },
{ name = "scipy", marker = "extra == 'dev'", specifier = "~=1.10" },
+ { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },
{ name = "setuptools", marker = "extra == 'dev'", specifier = ">=61.0" },
{ name = "setuptools-git-versioning", marker = "extra == 'dev'", specifier = ">=2.0,<3" },
{ name = "sphinx", marker = "extra == 'dev'", specifier = "~=7.1.2" },
@@ -975,7 +987,7 @@ requires-dist = [
{ name = "uvloop", specifier = ">=0.18" },
{ name = "uvloop", marker = "extra == 'perf'" },
]
-provides-extras = ["all", "recommended", "perf", "tokenizers", "audio", "vision", "dev"]
+provides-extras = ["all", "recommended", "perf", "tokenizers", "audio", "vision", "embeddings", "dev"]
[package.metadata.requires-dev]
dev = [{ name = "guidellm", extras = ["dev"] }]
@@ -1213,6 +1225,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
]
+[[package]]
+name = "joblib"
+version = "1.5.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
+]
+
[[package]]
name = "jsonschema"
version = "4.26.0"
@@ -1591,6 +1612,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/74/87/240a21533662ba227ec683adcc187ec3a64e927ccf0c35f0d3b1b2fd331c/mkdocs_linkcheck-1.0.6-py3-none-any.whl", hash = "sha256:70dceae090101778002d949dc7b55f56eeb0c294bd9053fb6b197c26591665b1", size = 19759, upload-time = "2021-08-20T20:38:18.87Z" },
]
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" },
+]
+
[[package]]
name = "mpmath"
version = "1.3.0"
@@ -1717,6 +1747,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c8/3e/c5187de84bb2c2ca334ab163fcacf19a23ebb1d876c837f81a1b324a15bf/msgspec-0.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:93f23528edc51d9f686808a361728e903d6f2be55c901d6f5c92e44c6d546bfc", size = 183011, upload-time = "2025-11-24T03:56:16.442Z" },
]
+[[package]]
+name = "mteb"
+version = "2.7.30"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "datasets" },
+ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "polars" },
+ { name = "pydantic" },
+ { name = "pytrec-eval-terrier" },
+ { name = "requests" },
+ { name = "rich" },
+ { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "sentence-transformers" },
+ { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+ { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
+ { name = "tqdm" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/64/fd/9498edc7037ebe1e0cd4f34b2e02b91cb27e97748985f12ced5770b62e18/mteb-2.7.30.tar.gz", hash = "sha256:a01a7ab0e2d4153c16c20d180b2380cd3e92b5bccae666a263460876755419f5", size = 3125915, upload-time = "2026-02-12T16:15:38.239Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/58/e0/32c942437499fb41a74ea55e41fee7e28ba9db31e1794dd435f6e13c8b4f/mteb-2.7.30-py3-none-any.whl", hash = "sha256:c2ee3da7ba4429e98d5d85d5280c1e44430b653d5983c6b5de83e19383bd678b", size = 4778663, upload-time = "2026-02-12T16:15:36.242Z" },
+]
+
[[package]]
name = "multidict"
version = "6.7.0"
@@ -2283,100 +2341,100 @@ wheels = [
[[package]]
name = "pillow"
-version = "12.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/5d/08/26e68b6b5da219c2a2cb7b563af008b53bb8e6b6fcb3fa40715fcdb2523a/pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b", size = 5289809, upload-time = "2025-10-15T18:21:27.791Z" },
- { url = "https://files.pythonhosted.org/packages/cb/e9/4e58fb097fb74c7b4758a680aacd558810a417d1edaa7000142976ef9d2f/pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1", size = 4650606, upload-time = "2025-10-15T18:21:29.823Z" },
- { url = "https://files.pythonhosted.org/packages/4b/e0/1fa492aa9f77b3bc6d471c468e62bfea1823056bf7e5e4f1914d7ab2565e/pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363", size = 6221023, upload-time = "2025-10-15T18:21:31.415Z" },
- { url = "https://files.pythonhosted.org/packages/c1/09/4de7cd03e33734ccd0c876f0251401f1314e819cbfd89a0fcb6e77927cc6/pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca", size = 8024937, upload-time = "2025-10-15T18:21:33.453Z" },
- { url = "https://files.pythonhosted.org/packages/2e/69/0688e7c1390666592876d9d474f5e135abb4acb39dcb583c4dc5490f1aff/pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e", size = 6334139, upload-time = "2025-10-15T18:21:35.395Z" },
- { url = "https://files.pythonhosted.org/packages/ed/1c/880921e98f525b9b44ce747ad1ea8f73fd7e992bafe3ca5e5644bf433dea/pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782", size = 7026074, upload-time = "2025-10-15T18:21:37.219Z" },
- { url = "https://files.pythonhosted.org/packages/28/03/96f718331b19b355610ef4ebdbbde3557c726513030665071fd025745671/pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10", size = 6448852, upload-time = "2025-10-15T18:21:39.168Z" },
- { url = "https://files.pythonhosted.org/packages/3a/a0/6a193b3f0cc9437b122978d2c5cbce59510ccf9a5b48825096ed7472da2f/pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa", size = 7117058, upload-time = "2025-10-15T18:21:40.997Z" },
- { url = "https://files.pythonhosted.org/packages/a7/c4/043192375eaa4463254e8e61f0e2ec9a846b983929a8d0a7122e0a6d6fff/pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275", size = 6295431, upload-time = "2025-10-15T18:21:42.518Z" },
- { url = "https://files.pythonhosted.org/packages/92/c6/c2f2fc7e56301c21827e689bb8b0b465f1b52878b57471a070678c0c33cd/pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d", size = 7000412, upload-time = "2025-10-15T18:21:44.404Z" },
- { url = "https://files.pythonhosted.org/packages/b2/d2/5f675067ba82da7a1c238a73b32e3fd78d67f9d9f80fbadd33a40b9c0481/pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7", size = 2435903, upload-time = "2025-10-15T18:21:46.29Z" },
- { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" },
- { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" },
- { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" },
- { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" },
- { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" },
- { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" },
- { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" },
- { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" },
- { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" },
- { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" },
- { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" },
- { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" },
- { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" },
- { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" },
- { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" },
- { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" },
- { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" },
- { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" },
- { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" },
- { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" },
- { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" },
- { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" },
- { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" },
- { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" },
- { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" },
- { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" },
- { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" },
- { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" },
- { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" },
- { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" },
- { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" },
- { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" },
- { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" },
- { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" },
- { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" },
- { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" },
- { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" },
- { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" },
- { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" },
- { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" },
- { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" },
- { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" },
- { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" },
- { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" },
- { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" },
- { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" },
- { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" },
- { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" },
- { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" },
- { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" },
- { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" },
- { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" },
- { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" },
- { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" },
- { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" },
- { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" },
- { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" },
- { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" },
- { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" },
- { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" },
- { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" },
- { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" },
- { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" },
- { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" },
- { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" },
- { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" },
- { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" },
- { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" },
- { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" },
- { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" },
- { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" },
- { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
- { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" },
- { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" },
- { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" },
- { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" },
- { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" },
- { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" },
- { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" },
+version = "12.1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1d/30/5bd3d794762481f8c8ae9c80e7b76ecea73b916959eb587521358ef0b2f9/pillow-12.1.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f1625b72740fdda5d77b4def688eb8fd6490975d06b909fd19f13f391e077e0", size = 5304099, upload-time = "2026-02-11T04:20:06.13Z" },
+ { url = "https://files.pythonhosted.org/packages/bd/c1/aab9e8f3eeb4490180e357955e15c2ef74b31f64790ff356c06fb6cf6d84/pillow-12.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:178aa072084bd88ec759052feca8e56cbb14a60b39322b99a049e58090479713", size = 4657880, upload-time = "2026-02-11T04:20:09.291Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/0a/9879e30d56815ad529d3985aeff5af4964202425c27261a6ada10f7cbf53/pillow-12.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b66e95d05ba806247aaa1561f080abc7975daf715c30780ff92a20e4ec546e1b", size = 6222587, upload-time = "2026-02-11T04:20:10.82Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/5f/a1b72ff7139e4f89014e8d451442c74a774d5c43cd938fb0a9f878576b37/pillow-12.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89c7e895002bbe49cdc5426150377cbbc04767d7547ed145473f496dfa40408b", size = 8027678, upload-time = "2026-02-11T04:20:12.455Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/c2/c7cb187dac79a3d22c3ebeae727abee01e077c8c7d930791dc592f335153/pillow-12.1.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a5cbdcddad0af3da87cb16b60d23648bc3b51967eb07223e9fed77a82b457c4", size = 6335777, upload-time = "2026-02-11T04:20:14.441Z" },
+ { url = "https://files.pythonhosted.org/packages/0c/7b/f9b09a7804ec7336effb96c26d37c29d27225783dc1501b7d62dcef6ae25/pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9f51079765661884a486727f0729d29054242f74b46186026582b4e4769918e4", size = 7027140, upload-time = "2026-02-11T04:20:16.387Z" },
+ { url = "https://files.pythonhosted.org/packages/98/b2/2fa3c391550bd421b10849d1a2144c44abcd966daadd2f7c12e19ea988c4/pillow-12.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:99c1506ea77c11531d75e3a412832a13a71c7ebc8192ab9e4b2e355555920e3e", size = 6449855, upload-time = "2026-02-11T04:20:18.554Z" },
+ { url = "https://files.pythonhosted.org/packages/96/ff/9caf4b5b950c669263c39e96c78c0d74a342c71c4f43fd031bb5cb7ceac9/pillow-12.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36341d06738a9f66c8287cf8b876d24b18db9bd8740fa0672c74e259ad408cff", size = 7151329, upload-time = "2026-02-11T04:20:20.646Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/f8/4b24841f582704da675ca535935bccb32b00a6da1226820845fac4a71136/pillow-12.1.1-cp310-cp310-win32.whl", hash = "sha256:6c52f062424c523d6c4db85518774cc3d50f5539dd6eed32b8f6229b26f24d40", size = 6325574, upload-time = "2026-02-11T04:20:22.43Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/f9/9f6b01c0881d7036063aa6612ef04c0e2cad96be21325a1e92d0203f8e91/pillow-12.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6008de247150668a705a6338156efb92334113421ceecf7438a12c9a12dab23", size = 7032347, upload-time = "2026-02-11T04:20:23.932Z" },
+ { url = "https://files.pythonhosted.org/packages/79/13/c7922edded3dcdaf10c59297540b72785620abc0538872c819915746757d/pillow-12.1.1-cp310-cp310-win_arm64.whl", hash = "sha256:1a9b0ee305220b392e1124a764ee4265bd063e54a751a6b62eff69992f457fa9", size = 2453457, upload-time = "2026-02-11T04:20:25.392Z" },
+ { url = "https://files.pythonhosted.org/packages/2b/46/5da1ec4a5171ee7bf1a0efa064aba70ba3d6e0788ce3f5acd1375d23c8c0/pillow-12.1.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e879bb6cd5c73848ef3b2b48b8af9ff08c5b71ecda8048b7dd22d8a33f60be32", size = 5304084, upload-time = "2026-02-11T04:20:27.501Z" },
+ { url = "https://files.pythonhosted.org/packages/78/93/a29e9bc02d1cf557a834da780ceccd54e02421627200696fcf805ebdc3fb/pillow-12.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:365b10bb9417dd4498c0e3b128018c4a624dc11c7b97d8cc54effe3b096f4c38", size = 4657866, upload-time = "2026-02-11T04:20:29.827Z" },
+ { url = "https://files.pythonhosted.org/packages/13/84/583a4558d492a179d31e4aae32eadce94b9acf49c0337c4ce0b70e0a01f2/pillow-12.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4ce8e329c93845720cd2014659ca67eac35f6433fd3050393d85f3ecef0dad5", size = 6232148, upload-time = "2026-02-11T04:20:31.329Z" },
+ { url = "https://files.pythonhosted.org/packages/d5/e2/53c43334bbbb2d3b938978532fbda8e62bb6e0b23a26ce8592f36bcc4987/pillow-12.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc354a04072b765eccf2204f588a7a532c9511e8b9c7f900e1b64e3e33487090", size = 8038007, upload-time = "2026-02-11T04:20:34.225Z" },
+ { url = "https://files.pythonhosted.org/packages/b8/a6/3d0e79c8a9d58150dd98e199d7c1c56861027f3829a3a60b3c2784190180/pillow-12.1.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e7976bf1910a8116b523b9f9f58bf410f3e8aa330cd9a2bb2953f9266ab49af", size = 6345418, upload-time = "2026-02-11T04:20:35.858Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/c8/46dfeac5825e600579157eea177be43e2f7ff4a99da9d0d0a49533509ac5/pillow-12.1.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:597bd9c8419bc7c6af5604e55847789b69123bbe25d65cc6ad3012b4f3c98d8b", size = 7034590, upload-time = "2026-02-11T04:20:37.91Z" },
+ { url = "https://files.pythonhosted.org/packages/af/bf/e6f65d3db8a8bbfeaf9e13cc0417813f6319863a73de934f14b2229ada18/pillow-12.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2c1fc0f2ca5f96a3c8407e41cca26a16e46b21060fe6d5b099d2cb01412222f5", size = 6458655, upload-time = "2026-02-11T04:20:39.496Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/c2/66091f3f34a25894ca129362e510b956ef26f8fb67a0e6417bc5744e56f1/pillow-12.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:578510d88c6229d735855e1f278aa305270438d36a05031dfaae5067cc8eb04d", size = 7159286, upload-time = "2026-02-11T04:20:41.139Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/5a/24bc8eb526a22f957d0cec6243146744966d40857e3d8deb68f7902ca6c1/pillow-12.1.1-cp311-cp311-win32.whl", hash = "sha256:7311c0a0dcadb89b36b7025dfd8326ecfa36964e29913074d47382706e516a7c", size = 6328663, upload-time = "2026-02-11T04:20:43.184Z" },
+ { url = "https://files.pythonhosted.org/packages/31/03/bef822e4f2d8f9d7448c133d0a18185d3cce3e70472774fffefe8b0ed562/pillow-12.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:fbfa2a7c10cc2623f412753cddf391c7f971c52ca40a3f65dc5039b2939e8563", size = 7031448, upload-time = "2026-02-11T04:20:44.696Z" },
+ { url = "https://files.pythonhosted.org/packages/49/70/f76296f53610bd17b2e7d31728b8b7825e3ac3b5b3688b51f52eab7c0818/pillow-12.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:b81b5e3511211631b3f672a595e3221252c90af017e399056d0faabb9538aa80", size = 2453651, upload-time = "2026-02-11T04:20:46.243Z" },
+ { url = "https://files.pythonhosted.org/packages/07/d3/8df65da0d4df36b094351dce696f2989bec731d4f10e743b1c5f4da4d3bf/pillow-12.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab323b787d6e18b3d91a72fc99b1a2c28651e4358749842b8f8dfacd28ef2052", size = 5262803, upload-time = "2026-02-11T04:20:47.653Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/71/5026395b290ff404b836e636f51d7297e6c83beceaa87c592718747e670f/pillow-12.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adebb5bee0f0af4909c30db0d890c773d1a92ffe83da908e2e9e720f8edf3984", size = 4657601, upload-time = "2026-02-11T04:20:49.328Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/2e/1001613d941c67442f745aff0f7cc66dd8df9a9c084eb497e6a543ee6f7e/pillow-12.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb66b7cc26f50977108790e2456b7921e773f23db5630261102233eb355a3b79", size = 6234995, upload-time = "2026-02-11T04:20:51.032Z" },
+ { url = "https://files.pythonhosted.org/packages/07/26/246ab11455b2549b9233dbd44d358d033a2f780fa9007b61a913c5b2d24e/pillow-12.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aee2810642b2898bb187ced9b349e95d2a7272930796e022efaf12e99dccd293", size = 8045012, upload-time = "2026-02-11T04:20:52.882Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/8b/07587069c27be7535ac1fe33874e32de118fbd34e2a73b7f83436a88368c/pillow-12.1.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b1cd6232e2b618adcc54d9882e4e662a089d5768cd188f7c245b4c8c44a397", size = 6349638, upload-time = "2026-02-11T04:20:54.444Z" },
+ { url = "https://files.pythonhosted.org/packages/ff/79/6df7b2ee763d619cda2fb4fea498e5f79d984dae304d45a8999b80d6cf5c/pillow-12.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7aac39bcf8d4770d089588a2e1dd111cbaa42df5a94be3114222057d68336bd0", size = 7041540, upload-time = "2026-02-11T04:20:55.97Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/5e/2ba19e7e7236d7529f4d873bdaf317a318896bac289abebd4bb00ef247f0/pillow-12.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ab174cd7d29a62dd139c44bf74b698039328f45cb03b4596c43473a46656b2f3", size = 6462613, upload-time = "2026-02-11T04:20:57.542Z" },
+ { url = "https://files.pythonhosted.org/packages/03/03/31216ec124bb5c3dacd74ce8efff4cc7f52643653bad4825f8f08c697743/pillow-12.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:339ffdcb7cbeaa08221cd401d517d4b1fe7a9ed5d400e4a8039719238620ca35", size = 7166745, upload-time = "2026-02-11T04:20:59.196Z" },
+ { url = "https://files.pythonhosted.org/packages/1f/e7/7c4552d80052337eb28653b617eafdef39adfb137c49dd7e831b8dc13bc5/pillow-12.1.1-cp312-cp312-win32.whl", hash = "sha256:5d1f9575a12bed9e9eedd9a4972834b08c97a352bd17955ccdebfeca5913fa0a", size = 6328823, upload-time = "2026-02-11T04:21:01.385Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/17/688626d192d7261bbbf98846fc98995726bddc2c945344b65bec3a29d731/pillow-12.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:21329ec8c96c6e979cd0dfd29406c40c1d52521a90544463057d2aaa937d66a6", size = 7033367, upload-time = "2026-02-11T04:21:03.536Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/fe/a0ef1f73f939b0eca03ee2c108d0043a87468664770612602c63266a43c4/pillow-12.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:af9a332e572978f0218686636610555ae3defd1633597be015ed50289a03c523", size = 2453811, upload-time = "2026-02-11T04:21:05.116Z" },
+ { url = "https://files.pythonhosted.org/packages/d5/11/6db24d4bd7685583caeae54b7009584e38da3c3d4488ed4cd25b439de486/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:d242e8ac078781f1de88bf823d70c1a9b3c7950a44cdf4b7c012e22ccbcd8e4e", size = 4062689, upload-time = "2026-02-11T04:21:06.804Z" },
+ { url = "https://files.pythonhosted.org/packages/33/c0/ce6d3b1fe190f0021203e0d9b5b99e57843e345f15f9ef22fcd43842fd21/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:02f84dfad02693676692746df05b89cf25597560db2857363a208e393429f5e9", size = 4138535, upload-time = "2026-02-11T04:21:08.452Z" },
+ { url = "https://files.pythonhosted.org/packages/a0/c6/d5eb6a4fb32a3f9c21a8c7613ec706534ea1cf9f4b3663e99f0d83f6fca8/pillow-12.1.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:e65498daf4b583091ccbb2556c7000abf0f3349fcd57ef7adc9a84a394ed29f6", size = 3601364, upload-time = "2026-02-11T04:21:10.194Z" },
+ { url = "https://files.pythonhosted.org/packages/14/a1/16c4b823838ba4c9c52c0e6bbda903a3fe5a1bdbf1b8eb4fff7156f3e318/pillow-12.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c6db3b84c87d48d0088943bf33440e0c42370b99b1c2a7989216f7b42eede60", size = 5262561, upload-time = "2026-02-11T04:21:11.742Z" },
+ { url = "https://files.pythonhosted.org/packages/bb/ad/ad9dc98ff24f485008aa5cdedaf1a219876f6f6c42a4626c08bc4e80b120/pillow-12.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b7e5304e34942bf62e15184219a7b5ad4ff7f3bb5cca4d984f37df1a0e1aee2", size = 4657460, upload-time = "2026-02-11T04:21:13.786Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/1b/f1a4ea9a895b5732152789326202a82464d5254759fbacae4deea3069334/pillow-12.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5bddd742a44b7e6b1e773ab5db102bd7a94c32555ba656e76d319d19c3850", size = 6232698, upload-time = "2026-02-11T04:21:15.949Z" },
+ { url = "https://files.pythonhosted.org/packages/95/f4/86f51b8745070daf21fd2e5b1fe0eb35d4db9ca26e6d58366562fb56a743/pillow-12.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc44ef1f3de4f45b50ccf9136999d71abb99dca7706bc75d222ed350b9fd2289", size = 8041706, upload-time = "2026-02-11T04:21:17.723Z" },
+ { url = "https://files.pythonhosted.org/packages/29/9b/d6ecd956bb1266dd1045e995cce9b8d77759e740953a1c9aad9502a0461e/pillow-12.1.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a8eb7ed8d4198bccbd07058416eeec51686b498e784eda166395a23eb99138e", size = 6346621, upload-time = "2026-02-11T04:21:19.547Z" },
+ { url = "https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717", size = 7038069, upload-time = "2026-02-11T04:21:21.378Z" },
+ { url = "https://files.pythonhosted.org/packages/94/0e/58cb1a6bc48f746bc4cb3adb8cabff73e2742c92b3bf7a220b7cf69b9177/pillow-12.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:518a48c2aab7ce596d3bf79d0e275661b846e86e4d0e7dec34712c30fe07f02a", size = 6460040, upload-time = "2026-02-11T04:21:23.148Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/57/9045cb3ff11eeb6c1adce3b2d60d7d299d7b273a2e6c8381a524abfdc474/pillow-12.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a550ae29b95c6dc13cf69e2c9dc5747f814c54eeb2e32d683e5e93af56caa029", size = 7164523, upload-time = "2026-02-11T04:21:25.01Z" },
+ { url = "https://files.pythonhosted.org/packages/73/f2/9be9cb99f2175f0d4dbadd6616ce1bf068ee54a28277ea1bf1fbf729c250/pillow-12.1.1-cp313-cp313-win32.whl", hash = "sha256:a003d7422449f6d1e3a34e3dd4110c22148336918ddbfc6a32581cd54b2e0b2b", size = 6332552, upload-time = "2026-02-11T04:21:27.238Z" },
+ { url = "https://files.pythonhosted.org/packages/3f/eb/b0834ad8b583d7d9d42b80becff092082a1c3c156bb582590fcc973f1c7c/pillow-12.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:344cf1e3dab3be4b1fa08e449323d98a2a3f819ad20f4b22e77a0ede31f0faa1", size = 7040108, upload-time = "2026-02-11T04:21:29.462Z" },
+ { url = "https://files.pythonhosted.org/packages/d5/7d/fc09634e2aabdd0feabaff4a32f4a7d97789223e7c2042fd805ea4b4d2c2/pillow-12.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:5c0dd1636633e7e6a0afe7bf6a51a14992b7f8e60de5789018ebbdfae55b040a", size = 2453712, upload-time = "2026-02-11T04:21:31.072Z" },
+ { url = "https://files.pythonhosted.org/packages/19/2a/b9d62794fc8a0dd14c1943df68347badbd5511103e0d04c035ffe5cf2255/pillow-12.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0330d233c1a0ead844fc097a7d16c0abff4c12e856c0b325f231820fee1f39da", size = 5264880, upload-time = "2026-02-11T04:21:32.865Z" },
+ { url = "https://files.pythonhosted.org/packages/26/9d/e03d857d1347fa5ed9247e123fcd2a97b6220e15e9cb73ca0a8d91702c6e/pillow-12.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dae5f21afb91322f2ff791895ddd8889e5e947ff59f71b46041c8ce6db790bc", size = 4660616, upload-time = "2026-02-11T04:21:34.97Z" },
+ { url = "https://files.pythonhosted.org/packages/f7/ec/8a6d22afd02570d30954e043f09c32772bfe143ba9285e2fdb11284952cd/pillow-12.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e0c664be47252947d870ac0d327fea7e63985a08794758aa8af5b6cb6ec0c9c", size = 6269008, upload-time = "2026-02-11T04:21:36.623Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/1d/6d875422c9f28a4a361f495a5f68d9de4a66941dc2c619103ca335fa6446/pillow-12.1.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:691ab2ac363b8217f7d31b3497108fb1f50faab2f75dfb03284ec2f217e87bf8", size = 8073226, upload-time = "2026-02-11T04:21:38.585Z" },
+ { url = "https://files.pythonhosted.org/packages/a1/cd/134b0b6ee5eda6dc09e25e24b40fdafe11a520bc725c1d0bbaa5e00bf95b/pillow-12.1.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9e8064fb1cc019296958595f6db671fba95209e3ceb0c4734c9baf97de04b20", size = 6380136, upload-time = "2026-02-11T04:21:40.562Z" },
+ { url = "https://files.pythonhosted.org/packages/7a/a9/7628f013f18f001c1b98d8fffe3452f306a70dc6aba7d931019e0492f45e/pillow-12.1.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:472a8d7ded663e6162dafdf20015c486a7009483ca671cece7a9279b512fcb13", size = 7067129, upload-time = "2026-02-11T04:21:42.521Z" },
+ { url = "https://files.pythonhosted.org/packages/1e/f8/66ab30a2193b277785601e82ee2d49f68ea575d9637e5e234faaa98efa4c/pillow-12.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:89b54027a766529136a06cfebeecb3a04900397a3590fd252160b888479517bf", size = 6491807, upload-time = "2026-02-11T04:21:44.22Z" },
+ { url = "https://files.pythonhosted.org/packages/da/0b/a877a6627dc8318fdb84e357c5e1a758c0941ab1ddffdafd231983788579/pillow-12.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:86172b0831b82ce4f7877f280055892b31179e1576aa00d0df3bb1bbf8c3e524", size = 7190954, upload-time = "2026-02-11T04:21:46.114Z" },
+ { url = "https://files.pythonhosted.org/packages/83/43/6f732ff85743cf746b1361b91665d9f5155e1483817f693f8d57ea93147f/pillow-12.1.1-cp313-cp313t-win32.whl", hash = "sha256:44ce27545b6efcf0fdbdceb31c9a5bdea9333e664cda58a7e674bb74608b3986", size = 6336441, upload-time = "2026-02-11T04:21:48.22Z" },
+ { url = "https://files.pythonhosted.org/packages/3b/44/e865ef3986611bb75bfabdf94a590016ea327833f434558801122979cd0e/pillow-12.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a285e3eb7a5a45a2ff504e31f4a8d1b12ef62e84e5411c6804a42197c1cf586c", size = 7045383, upload-time = "2026-02-11T04:21:50.015Z" },
+ { url = "https://files.pythonhosted.org/packages/a8/c6/f4fb24268d0c6908b9f04143697ea18b0379490cb74ba9e8d41b898bd005/pillow-12.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cc7d296b5ea4d29e6570dabeaed58d31c3fea35a633a69679fb03d7664f43fb3", size = 2456104, upload-time = "2026-02-11T04:21:51.633Z" },
+ { url = "https://files.pythonhosted.org/packages/03/d0/bebb3ffbf31c5a8e97241476c4cf8b9828954693ce6744b4a2326af3e16b/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af", size = 4062652, upload-time = "2026-02-11T04:21:53.19Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/c0/0e16fb0addda4851445c28f8350d8c512f09de27bbb0d6d0bbf8b6709605/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f", size = 4138823, upload-time = "2026-02-11T04:22:03.088Z" },
+ { url = "https://files.pythonhosted.org/packages/6b/fb/6170ec655d6f6bb6630a013dd7cf7bc218423d7b5fa9071bf63dc32175ae/pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642", size = 3601143, upload-time = "2026-02-11T04:22:04.909Z" },
+ { url = "https://files.pythonhosted.org/packages/59/04/dc5c3f297510ba9a6837cbb318b87dd2b8f73eb41a43cc63767f65cb599c/pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd", size = 5266254, upload-time = "2026-02-11T04:22:07.656Z" },
+ { url = "https://files.pythonhosted.org/packages/05/30/5db1236b0d6313f03ebf97f5e17cda9ca060f524b2fcc875149a8360b21c/pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202", size = 4657499, upload-time = "2026-02-11T04:22:09.613Z" },
+ { url = "https://files.pythonhosted.org/packages/6f/18/008d2ca0eb612e81968e8be0bbae5051efba24d52debf930126d7eaacbba/pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f", size = 6232137, upload-time = "2026-02-11T04:22:11.434Z" },
+ { url = "https://files.pythonhosted.org/packages/70/f1/f14d5b8eeb4b2cd62b9f9f847eb6605f103df89ef619ac68f92f748614ea/pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f", size = 8042721, upload-time = "2026-02-11T04:22:13.321Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/d6/17824509146e4babbdabf04d8171491fa9d776f7061ff6e727522df9bd03/pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f", size = 6347798, upload-time = "2026-02-11T04:22:15.449Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/ee/c85a38a9ab92037a75615aba572c85ea51e605265036e00c5b67dfafbfe2/pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e", size = 7039315, upload-time = "2026-02-11T04:22:17.24Z" },
+ { url = "https://files.pythonhosted.org/packages/ec/f3/bc8ccc6e08a148290d7523bde4d9a0d6c981db34631390dc6e6ec34cacf6/pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0", size = 6462360, upload-time = "2026-02-11T04:22:19.111Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/ab/69a42656adb1d0665ab051eec58a41f169ad295cf81ad45406963105408f/pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb", size = 7165438, upload-time = "2026-02-11T04:22:21.041Z" },
+ { url = "https://files.pythonhosted.org/packages/02/46/81f7aa8941873f0f01d4b55cc543b0a3d03ec2ee30d617a0448bf6bd6dec/pillow-12.1.1-cp314-cp314-win32.whl", hash = "sha256:03edcc34d688572014ff223c125a3f77fb08091e4607e7745002fc214070b35f", size = 6431503, upload-time = "2026-02-11T04:22:22.833Z" },
+ { url = "https://files.pythonhosted.org/packages/40/72/4c245f7d1044b67affc7f134a09ea619d4895333d35322b775b928180044/pillow-12.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:50480dcd74fa63b8e78235957d302d98d98d82ccbfac4c7e12108ba9ecbdba15", size = 7176748, upload-time = "2026-02-11T04:22:24.64Z" },
+ { url = "https://files.pythonhosted.org/packages/e4/ad/8a87bdbe038c5c698736e3348af5c2194ffb872ea52f11894c95f9305435/pillow-12.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:5cb1785d97b0c3d1d1a16bc1d710c4a0049daefc4935f3a8f31f827f4d3d2e7f", size = 2544314, upload-time = "2026-02-11T04:22:26.685Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/9d/efd18493f9de13b87ede7c47e69184b9e859e4427225ea962e32e56a49bc/pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8", size = 5268612, upload-time = "2026-02-11T04:22:29.884Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/f1/4f42eb2b388eb2ffc660dcb7f7b556c1015c53ebd5f7f754965ef997585b/pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9", size = 4660567, upload-time = "2026-02-11T04:22:31.799Z" },
+ { url = "https://files.pythonhosted.org/packages/01/54/df6ef130fa43e4b82e32624a7b821a2be1c5653a5fdad8469687a7db4e00/pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60", size = 6269951, upload-time = "2026-02-11T04:22:33.921Z" },
+ { url = "https://files.pythonhosted.org/packages/a9/48/618752d06cc44bb4aae8ce0cd4e6426871929ed7b46215638088270d9b34/pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7", size = 8074769, upload-time = "2026-02-11T04:22:35.877Z" },
+ { url = "https://files.pythonhosted.org/packages/c3/bd/f1d71eb39a72fa088d938655afba3e00b38018d052752f435838961127d8/pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f", size = 6381358, upload-time = "2026-02-11T04:22:37.698Z" },
+ { url = "https://files.pythonhosted.org/packages/64/ef/c784e20b96674ed36a5af839305f55616f8b4f8aa8eeccf8531a6e312243/pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586", size = 7068558, upload-time = "2026-02-11T04:22:39.597Z" },
+ { url = "https://files.pythonhosted.org/packages/73/cb/8059688b74422ae61278202c4e1ad992e8a2e7375227be0a21c6b87ca8d5/pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce", size = 6493028, upload-time = "2026-02-11T04:22:42.73Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/da/e3c008ed7d2dd1f905b15949325934510b9d1931e5df999bb15972756818/pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8", size = 7191940, upload-time = "2026-02-11T04:22:44.543Z" },
+ { url = "https://files.pythonhosted.org/packages/01/4a/9202e8d11714c1fc5951f2e1ef362f2d7fbc595e1f6717971d5dd750e969/pillow-12.1.1-cp314-cp314t-win32.whl", hash = "sha256:d2912fd8114fc5545aa3a4b5576512f64c55a03f3ebcca4c10194d593d43ea36", size = 6438736, upload-time = "2026-02-11T04:22:46.347Z" },
+ { url = "https://files.pythonhosted.org/packages/f3/ca/cbce2327eb9885476b3957b2e82eb12c866a8b16ad77392864ad601022ce/pillow-12.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4ceb838d4bd9dab43e06c363cab2eebf63846d6a4aeaea283bbdfd8f1a8ed58b", size = 7182894, upload-time = "2026-02-11T04:22:48.114Z" },
+ { url = "https://files.pythonhosted.org/packages/ec/d2/de599c95ba0a973b94410477f8bf0b6f0b5e67360eb89bcb1ad365258beb/pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334", size = 2546446, upload-time = "2026-02-11T04:22:50.342Z" },
+ { url = "https://files.pythonhosted.org/packages/56/11/5d43209aa4cb58e0cc80127956ff1796a68b928e6324bbf06ef4db34367b/pillow-12.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:600fd103672b925fe62ed08e0d874ea34d692474df6f4bf7ebe148b30f89f39f", size = 5228606, upload-time = "2026-02-11T04:22:52.106Z" },
+ { url = "https://files.pythonhosted.org/packages/5f/d5/3b005b4e4fda6698b371fa6c21b097d4707585d7db99e98d9b0b87ac612a/pillow-12.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:665e1b916b043cef294bc54d47bf02d87e13f769bc4bc5fa225a24b3a6c5aca9", size = 4622321, upload-time = "2026-02-11T04:22:53.827Z" },
+ { url = "https://files.pythonhosted.org/packages/df/36/ed3ea2d594356fd8037e5a01f6156c74bc8d92dbb0fa60746cc96cabb6e8/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:495c302af3aad1ca67420ddd5c7bd480c8867ad173528767d906428057a11f0e", size = 5247579, upload-time = "2026-02-11T04:22:56.094Z" },
+ { url = "https://files.pythonhosted.org/packages/54/9a/9cc3e029683cf6d20ae5085da0dafc63148e3252c2f13328e553aaa13cfb/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8fd420ef0c52c88b5a035a0886f367748c72147b2b8f384c9d12656678dfdfa9", size = 6989094, upload-time = "2026-02-11T04:22:58.288Z" },
+ { url = "https://files.pythonhosted.org/packages/00/98/fc53ab36da80b88df0967896b6c4b4cd948a0dc5aa40a754266aa3ae48b3/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f975aa7ef9684ce7e2c18a3aa8f8e2106ce1e46b94ab713d156b2898811651d3", size = 5313850, upload-time = "2026-02-11T04:23:00.554Z" },
+ { url = "https://files.pythonhosted.org/packages/30/02/00fa585abfd9fe9d73e5f6e554dc36cc2b842898cbfc46d70353dae227f8/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8089c852a56c2966cf18835db62d9b34fef7ba74c726ad943928d494fa7f4735", size = 5963343, upload-time = "2026-02-11T04:23:02.934Z" },
+ { url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" },
]
[[package]]
@@ -2397,6 +2455,34 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
+[[package]]
+name = "polars"
+version = "1.38.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "polars-runtime-32" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/5e/208a24471a433bcd0e9a6889ac49025fd4daad2815c8220c5bd2576e5f1b/polars-1.38.1.tar.gz", hash = "sha256:803a2be5344ef880ad625addfb8f641995cfd777413b08a10de0897345778239", size = 717667, upload-time = "2026-02-06T18:13:23.013Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0a/49/737c1a6273c585719858261753da0b688454d1b634438ccba8a9c4eb5aab/polars-1.38.1-py3-none-any.whl", hash = "sha256:a29479c48fed4984d88b656486d221f638cba45d3e961631a50ee5fdde38cb2c", size = 810368, upload-time = "2026-02-06T18:11:55.819Z" },
+]
+
+[[package]]
+name = "polars-runtime-32"
+version = "1.38.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/07/4b/04d6b3fb7cf336fbe12fbc4b43f36d1783e11bb0f2b1e3980ec44878df06/polars_runtime_32-1.38.1.tar.gz", hash = "sha256:04f20ed1f5c58771f34296a27029dc755a9e4b1390caeaef8f317e06fdfce2ec", size = 2812631, upload-time = "2026-02-06T18:13:25.206Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ae/a2/a00defbddadd8cf1042f52380dcba6b6592b03bac8e3b34c436b62d12d3b/polars_runtime_32-1.38.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:18154e96044724a0ac38ce155cf63aa03c02dd70500efbbf1a61b08cadd269ef", size = 44108001, upload-time = "2026-02-06T18:11:58.127Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/fb/599ff3709e6a303024efd7edfd08cf8de55c6ac39527d8f41cbc4399385f/polars_runtime_32-1.38.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:c49acac34cc4049ed188f1eb67d6ff3971a39b4af7f7b734b367119970f313ac", size = 40230140, upload-time = "2026-02-06T18:12:01.181Z" },
+ { url = "https://files.pythonhosted.org/packages/dc/8c/3ac18d6f89dc05fe2c7c0ee1dc5b81f77a5c85ad59898232c2500fe2ebbf/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fef2ef2626a954e010e006cc8e4de467ecf32d08008f130cea1c78911f545323", size = 41994039, upload-time = "2026-02-06T18:12:04.332Z" },
+ { url = "https://files.pythonhosted.org/packages/f2/5a/61d60ec5cc0ab37cbd5a699edb2f9af2875b7fdfdfb2a4608ca3cc5f0448/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8a5f7a8125e2d50e2e060296551c929aec09be23a9edcb2b12ca923f555a5ba", size = 45755804, upload-time = "2026-02-06T18:12:07.846Z" },
+ { url = "https://files.pythonhosted.org/packages/91/54/02cd4074c98c361ccd3fec3bcb0bd68dbc639c0550c42a4436b0ff0f3ccf/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:10d19cd9863e129273b18b7fcaab625b5c8143c2d22b3e549067b78efa32e4fa", size = 42159605, upload-time = "2026-02-06T18:12:10.919Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/f3/b2a5e720cc56eaa38b4518e63aa577b4bbd60e8b05a00fe43ca051be5879/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61e8d73c614b46a00d2f853625a7569a2e4a0999333e876354ac81d1bf1bb5e2", size = 45336615, upload-time = "2026-02-06T18:12:14.074Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/8d/ee2e4b7de948090cfb3df37d401c521233daf97bfc54ddec5d61d1d31618/polars_runtime_32-1.38.1-cp310-abi3-win_amd64.whl", hash = "sha256:08c2b3b93509c1141ac97891294ff5c5b0c548a373f583eaaea873a4bf506437", size = 45680732, upload-time = "2026-02-06T18:12:19.097Z" },
+ { url = "https://files.pythonhosted.org/packages/bf/18/72c216f4ab0c82b907009668f79183ae029116ff0dd245d56ef58aac48e7/polars_runtime_32-1.38.1-cp310-abi3-win_arm64.whl", hash = "sha256:6d07d0cc832bfe4fb54b6e04218c2c27afcfa6b9498f9f6bbf262a00d58cc7c4", size = 41639413, upload-time = "2026-02-06T18:12:22.044Z" },
+]
+
[[package]]
name = "pre-commit"
version = "3.5.0"
@@ -2953,6 +3039,42 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
]
+[[package]]
+name = "pytrec-eval-terrier"
+version = "0.5.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/04/96/4925a95e4865a647bc74d3bb052243d12a3c8e8a34909d7d097b5a4d08c5/pytrec_eval_terrier-0.5.10.tar.gz", hash = "sha256:eaaf20580d17b5575a233e04dab8a4cbcc01a7e45be8cf547c07f0a2bb3e7eb9", size = 18634, upload-time = "2025-10-20T16:50:18.098Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/bf/a6/09a081ea7cf76c680b5fa8367836cba5a019d1de5be295081992a0addfc1/pytrec_eval_terrier-0.5.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5e574b2d4285d42e3bdc7ca0d9724d46c3bce06d3ee5d6c20e90fdea19761a2f", size = 136811, upload-time = "2025-10-20T16:50:38.729Z" },
+ { url = "https://files.pythonhosted.org/packages/e3/b5/f18b1ad8936a38a7b1d51913189cd53d477d513cd48b79c7cb9bb7dc980f/pytrec_eval_terrier-0.5.10-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e69c78878379e3e5e280ecf91e9c3bd882f637763d2378655bb0f121e62efbd4", size = 303698, upload-time = "2025-10-20T16:54:13.674Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/1a/2d6a268d2327c38547b4e4a0f815fd51b4a93ab3ee5639260e82def444bb/pytrec_eval_terrier-0.5.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:04266dd7869276ae025399df69bf050bba26043b37426cd482fb9bcaa2b78ffa", size = 1327102, upload-time = "2025-10-20T16:54:14.551Z" },
+ { url = "https://files.pythonhosted.org/packages/44/d2/283bb904ee40d0a1bb6858e018fca63043632ac0426e4c5badd5548cc753/pytrec_eval_terrier-0.5.10-cp310-cp310-win_amd64.whl", hash = "sha256:bb0bb4495f10a0bff95f97a8c17df67c967d611c9fc1a5db13e143e7888b102e", size = 58611, upload-time = "2025-10-20T16:52:01.714Z" },
+ { url = "https://files.pythonhosted.org/packages/18/de/7659555355381e57a73e7ba31437dc31d3df146b5cc3fb66eb032683e84e/pytrec_eval_terrier-0.5.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1036735d4a12d1c92eea38a14a071168a292f8696099e90742c2c701479f010b", size = 136866, upload-time = "2025-10-20T16:50:40.054Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/d7/1cbc2d3936eec51b57e1146840eb3ccd8a9fb2debc519d7aa748f13dd724/pytrec_eval_terrier-0.5.10-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b36a2fbdccc7669c4b8aba1f6de2a661e6f2f77c10f05855eda55dda60fc88f5", size = 304025, upload-time = "2025-10-20T16:54:15.957Z" },
+ { url = "https://files.pythonhosted.org/packages/7a/a2/84c93f0a260d0dabca007a02b206981d235c7f4b4c569ec746b5ef6d965b/pytrec_eval_terrier-0.5.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e4ca19110f24922d7435cf9ef9951a61f0b575488b6a1db86081d82b88dd621", size = 1327402, upload-time = "2025-10-20T16:54:16.842Z" },
+ { url = "https://files.pythonhosted.org/packages/4e/ee/3a20da0523228f54d8b89b9a11d7ec402625086cc3167fb940e36a9e2d5b/pytrec_eval_terrier-0.5.10-cp311-cp311-win_amd64.whl", hash = "sha256:d36e9a8966560ed10bc5aeb30c5c29a53d3fe8e4ccb6ff6bb026bffb21be3fe3", size = 58558, upload-time = "2025-10-20T16:51:46.032Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/ca/f0edd9df08c08c96d2f088c298cfb824c3ee816302ac1f911ecb1bfdd681/pytrec_eval_terrier-0.5.10-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28c3c14728713cdbad165964e2d1aba96b0fc7445a5a13168b398e9bd3bbd08", size = 137179, upload-time = "2025-10-20T16:51:07.809Z" },
+ { url = "https://files.pythonhosted.org/packages/73/55/e02a14b0d3ac520849f66391f03c6783b3383fd23a19372d07a2280b815e/pytrec_eval_terrier-0.5.10-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:689ee541d72c27d14ae15cd1f11d2cb86cf9bdc880f5e8af9c5dbbdd47663d4d", size = 304845, upload-time = "2025-10-20T16:54:17.791Z" },
+ { url = "https://files.pythonhosted.org/packages/76/9c/9020b700199b09ebdfc6dbadae81641a49555c4ee21dedbe2aa98af601b5/pytrec_eval_terrier-0.5.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3f02118dadd3c09b71462bb26e405e49bd10fe0c60bcc169fcd31454a4256dc2", size = 1327965, upload-time = "2025-10-20T16:54:18.743Z" },
+ { url = "https://files.pythonhosted.org/packages/39/9e/6e7c2b89f52e1cebeef6c3bb47272f5bd69766ddbc6e9e5445da0c876899/pytrec_eval_terrier-0.5.10-cp312-cp312-win_amd64.whl", hash = "sha256:202e48fe24948453fe45dcd73261f9865f99cb2ff4c8a3255ac2ab4c993a64ba", size = 58641, upload-time = "2025-10-20T16:51:26.148Z" },
+ { url = "https://files.pythonhosted.org/packages/93/21/71a0dee7e2cd368237432af6bf6051ffde03370730dc1666cd39494c82a7/pytrec_eval_terrier-0.5.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:fcf96c33446c16de8db78e829c5279f7404ceaaf6b502bb5a6a3669b06051601", size = 137186, upload-time = "2025-10-20T16:50:22.941Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/8c/2494edf20d726bdd3ee0a20dc5ed84351c6cc6ccc17b11b474e315808762/pytrec_eval_terrier-0.5.10-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8455485f1faf6759f1be11b12c904d1c749ba5db7e2b6f414aa56e19533ce069", size = 304917, upload-time = "2025-10-20T16:54:20.486Z" },
+ { url = "https://files.pythonhosted.org/packages/cf/51/7611546afb55548e65db35354a63b90d5fd5ea593fc64e5993088bf61415/pytrec_eval_terrier-0.5.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e7cc9666305281b0ca1873761dc71cd3f0863e6d759f00a12fd363aa2d558d6f", size = 1327998, upload-time = "2025-10-20T16:54:21.375Z" },
+ { url = "https://files.pythonhosted.org/packages/74/b3/20941b4dbe3b267271ed1ef80aa93b348da674aecb5d6aca8f311c4738b0/pytrec_eval_terrier-0.5.10-cp313-cp313-win_amd64.whl", hash = "sha256:9440bd4a78ee0bc5db6821d7483e962a6c494303fd26598f84f00d54cc64cdd7", size = 58631, upload-time = "2025-10-20T16:51:05.08Z" },
+ { url = "https://files.pythonhosted.org/packages/f0/34/e3d0f75286151d97537309b3f311e1269b0194e3823038fc39054e84c3b4/pytrec_eval_terrier-0.5.10-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:70bc61b8d02e61a37ed97c088282bb0a124b58e7141cc52756512750efabacbb", size = 137320, upload-time = "2025-10-20T16:50:50.92Z" },
+ { url = "https://files.pythonhosted.org/packages/1c/72/2c1f9fd44ed7a5657654a712e5255019d5d23ba2b3d53848da1838bfb8df/pytrec_eval_terrier-0.5.10-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d52d94803c32cadbff7fe5195b0d0d68d27393092f64207fe8250a4485d1f8d7", size = 304917, upload-time = "2025-10-20T16:54:22.59Z" },
+ { url = "https://files.pythonhosted.org/packages/66/9d/7e440de7b37dd31cd78eefe2ec1bf3e5f49db42b17b34dc8d6006ee03fc5/pytrec_eval_terrier-0.5.10-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:77950d0ce9bd960af40efede6850e7b6519400e7fda3f9313e0d0d02c247e4e2", size = 1327991, upload-time = "2025-10-20T16:54:23.76Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/94/5639d7c346935a75540c1f1798be277c161b561001f2a91ef303e3d85f10/pytrec_eval_terrier-0.5.10-cp314-cp314-win_amd64.whl", hash = "sha256:c69681fec350fa94af45dd7ef8f53f605e89f752583c814f713d7d2329435cfc", size = 60178, upload-time = "2025-10-20T16:51:50.946Z" },
+ { url = "https://files.pythonhosted.org/packages/f4/a7/9080fe3f971397ea4447e3bda0c350225c944047ede7927c9a1f788af000/pytrec_eval_terrier-0.5.10-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:876740f3d58625058d34aaa1939be31bf253ecacd85d0d8b1089db5dd57ab127", size = 308002, upload-time = "2025-10-20T16:54:24.746Z" },
+ { url = "https://files.pythonhosted.org/packages/ad/c9/5bf9d58cb275559211ba4af905c5a4d95f78c4b973f4186f8b22d8c0b073/pytrec_eval_terrier-0.5.10-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2ca4e624e5f2589ae75c1034ff1f38e9fc81de86314193508ac423e7ca56769c", size = 1330474, upload-time = "2025-10-20T16:54:25.569Z" },
+]
+
[[package]]
name = "pytz"
version = "2025.2"
@@ -3461,6 +3583,110 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cf/e3/3425c9a8773807ac2c01d6a56c8521733f09b627e5827e733c5cd36b9ac5/sanic_routing-23.12.0-py3-none-any.whl", hash = "sha256:1558a72afcb9046ed3134a5edae02fc1552cff08f0fff2e8d5de0877ea43ed73", size = 25522, upload-time = "2023-12-31T09:28:35.233Z" },
]
+[[package]]
+name = "scikit-learn"
+version = "1.7.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "python_full_version < '3.11' and sys_platform != 'darwin'",
+ "python_full_version < '3.11' and sys_platform == 'darwin'",
+]
+dependencies = [
+ { name = "joblib", marker = "python_full_version < '3.11'" },
+ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "threadpoolctl", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" },
+ { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" },
+ { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" },
+ { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" },
+ { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" },
+ { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" },
+ { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" },
+ { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" },
+ { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" },
+ { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" },
+ { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" },
+ { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" },
+ { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" },
+ { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" },
+ { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" },
+ { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" },
+ { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" },
+ { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" },
+ { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" },
+ { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" },
+ { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" },
+ { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" },
+ { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" },
+ { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" },
+]
+
+[[package]]
+name = "scikit-learn"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "python_full_version >= '3.12' and sys_platform != 'darwin'",
+ "python_full_version == '3.11.*' and sys_platform != 'darwin'",
+ "python_full_version >= '3.12' and sys_platform == 'darwin'",
+ "python_full_version == '3.11.*' and sys_platform == 'darwin'",
+]
+dependencies = [
+ { name = "joblib", marker = "python_full_version >= '3.11'" },
+ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "threadpoolctl", marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c9/92/53ea2181da8ac6bf27170191028aee7251f8f841f8d3edbfdcaf2008fde9/scikit_learn-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da", size = 8595835, upload-time = "2025-12-10T07:07:39.385Z" },
+ { url = "https://files.pythonhosted.org/packages/01/18/d154dc1638803adf987910cdd07097d9c526663a55666a97c124d09fb96a/scikit_learn-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1", size = 8080381, upload-time = "2025-12-10T07:07:41.93Z" },
+ { url = "https://files.pythonhosted.org/packages/8a/44/226142fcb7b7101e64fdee5f49dbe6288d4c7af8abf593237b70fca080a4/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e30adb87f0cc81c7690a84f7932dd66be5bac57cfe16b91cb9151683a4a2d3b", size = 8799632, upload-time = "2025-12-10T07:07:43.899Z" },
+ { url = "https://files.pythonhosted.org/packages/36/4d/4a67f30778a45d542bbea5db2dbfa1e9e100bf9ba64aefe34215ba9f11f6/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ada8121bcb4dac28d930febc791a69f7cb1673c8495e5eee274190b73a4559c1", size = 9103788, upload-time = "2025-12-10T07:07:45.982Z" },
+ { url = "https://files.pythonhosted.org/packages/89/3c/45c352094cfa60050bcbb967b1faf246b22e93cb459f2f907b600f2ceda5/scikit_learn-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:c57b1b610bd1f40ba43970e11ce62821c2e6569e4d74023db19c6b26f246cb3b", size = 8081706, upload-time = "2025-12-10T07:07:48.111Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/46/5416595bb395757f754feb20c3d776553a386b661658fb21b7c814e89efe/scikit_learn-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:2838551e011a64e3053ad7618dda9310175f7515f1742fa2d756f7c874c05961", size = 7688451, upload-time = "2025-12-10T07:07:49.873Z" },
+ { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242, upload-time = "2025-12-10T07:07:51.568Z" },
+ { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075, upload-time = "2025-12-10T07:07:53.697Z" },
+ { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492, upload-time = "2025-12-10T07:07:55.574Z" },
+ { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904, upload-time = "2025-12-10T07:07:57.666Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359, upload-time = "2025-12-10T07:07:59.838Z" },
+ { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898, upload-time = "2025-12-10T07:08:01.36Z" },
+ { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload-time = "2025-12-10T07:08:03.251Z" },
+ { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" },
+ { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" },
+ { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload-time = "2025-12-10T07:08:12.028Z" },
+ { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload-time = "2025-12-10T07:08:13.688Z" },
+ { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" },
+ { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" },
+ { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload-time = "2025-12-10T07:08:23.943Z" },
+ { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload-time = "2025-12-10T07:08:25.71Z" },
+ { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" },
+ { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" },
+ { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" },
+ { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" },
+ { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518, upload-time = "2025-12-10T07:08:36.339Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546, upload-time = "2025-12-10T07:08:38.128Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" },
+ { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" },
+ { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395, upload-time = "2025-12-10T07:08:49.337Z" },
+ { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload-time = "2025-12-10T07:08:51.601Z" },
+]
+
[[package]]
name = "scipy"
version = "1.15.3"
@@ -3598,6 +3824,29 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/64/47/a494741db7280eae6dc033510c319e34d42dd41b7ac0c7ead39354d1a2b5/scipy-1.16.3-cp314-cp314t-win_arm64.whl", hash = "sha256:21d9d6b197227a12dcbf9633320a4e34c6b0e51c57268df255a0942983bac562", size = 26464127, upload-time = "2025-10-28T17:38:11.34Z" },
]
+[[package]]
+name = "sentence-transformers"
+version = "5.2.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "huggingface-hub" },
+ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+ { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
+ { name = "tqdm" },
+ { name = "transformers" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a6/bc/0bc9c0ec1cf83ab2ec6e6f38667d167349b950fff6dd2086b79bd360eeca/sentence_transformers-5.2.2.tar.gz", hash = "sha256:7033ee0a24bc04c664fd490abf2ef194d387b3a58a97adcc528783ff505159fa", size = 381607, upload-time = "2026-01-27T11:11:02.658Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/cc/21/7e925890636791386e81b52878134f114d63072e79fffe14cdcc5e7a5e6a/sentence_transformers-5.2.2-py3-none-any.whl", hash = "sha256:280ac54bffb84c110726b4d8848ba7b7c60813b9034547f8aea6e9a345cd1c23", size = 494106, upload-time = "2026-01-27T11:11:00.983Z" },
+]
+
[[package]]
name = "setuptools"
version = "80.9.0"
@@ -3750,6 +3999,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
]
+[[package]]
+name = "threadpoolctl"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
+]
+
[[package]]
name = "tiktoken"
version = "0.12.0"