diff --git a/pyproject.toml b/pyproject.toml index 27bea0625..843eeace0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ include = ["*"] [tool.setuptools.package-data] "guidellm.data" = ["*.gz"] "guidellm.benchmark.scenarios" = ["*.json", "**/*.json"] +"guidellm.benchmark.outputs.html_outputs" = ["*.html"] [[tool.uv.index]] name = "pytorch-cpu" @@ -70,11 +71,12 @@ dependencies = [ "transformers", "uvloop>=0.18", "torch", + "more-itertools>=10.8.0", ] [project.optional-dependencies] # Meta Extras -all = ["guidellm[perf,tokenizers,audio,vision]"] +all = ["guidellm[perf,tokenizers,audio,vision,embeddings]"] recommended = ["guidellm[perf,tokenizers]"] # Feature Extras perf = ["orjson", "msgpack", "msgspec", "uvloop"] @@ -90,6 +92,12 @@ vision = [ "datasets[vision]", "pillow", ] +embeddings = [ + # Quality validation with baseline models + "sentence-transformers>=2.2.0", + # MTEB benchmark integration + "mteb>=1.0.0", +] # Dev Tooling dev = [ # Install all optional dependencies @@ -179,7 +187,9 @@ module = [ "transformers.*", "setuptools.*", "setuptools_git_versioning.*", - "torchcodec.*" + "torchcodec.*", + "sentence_transformers.*", + "mteb.*" ] ignore_missing_imports = true diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 7e9dab87f..f11461b05 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -792,5 +792,226 @@ def mock_server( server.run() +@benchmark.command( + "embeddings", + help=( + "Run embeddings benchmark with optional quality validation. " + "Supports cosine similarity validation and MTEB benchmark evaluation." + ), + context_settings={"auto_envvar_prefix": "GUIDELLM"}, +) +@click.option( + "--target", + type=str, + required=True, + help="Target backend URL (e.g., http://localhost:8000).", +) +@click.option( + "--data", + type=str, + multiple=True, + required=True, + help=( + "HuggingFace dataset ID, path to dataset, path to data file " + "(csv/json/jsonl/txt), or synthetic data config." + ), +) +@click.option( + "--profile", + default="sweep", + type=click.Choice(STRATEGY_PROFILE_CHOICES), + help=f"Benchmark profile type. Options: {', '.join(STRATEGY_PROFILE_CHOICES)}.", +) +@click.option( + "--rate", + callback=cli_tools.parse_list_floats, + multiple=True, + default=None, + help="Benchmark rate(s) to test. Meaning depends on profile.", +) +@click.option( + "--backend", + type=click.Choice(list(get_literal_vals(BackendType))), + default="openai_http", + help=f"Backend type. Options: {', '.join(get_literal_vals(BackendType))}.", +) +@click.option( + "--backend-kwargs", + callback=cli_tools.parse_json, + default=None, + help='JSON string of backend arguments. E.g., \'{"api_key": "key"}\'', +) +@click.option( + "--model", + default=None, + type=str, + help="Model ID to benchmark. If not provided, uses first available model.", +) +@click.option( + "--request-format", + default="embeddings", + help="Format to use for requests (default: embeddings).", +) +@click.option( + "--processor", + default=None, + type=str, + help="Processor or tokenizer for token counts. If not provided, loads from model.", +) +@click.option( + "--data-samples", + default=-1, + type=int, + help="Number of samples from dataset. -1 (default) uses all samples.", +) +@click.option( + "--outputs", + default=["json", "csv", "html"], + callback=cli_tools.parse_list, + help=( + "Comma-separated list of output formats: json,csv,html,console. " + "Default: json,csv,html" + ), +) +@click.option( + "--output-dir", + type=click.Path(file_okay=False, dir_okay=True, path_type=Path), + default=Path.cwd(), + help="Directory to save output files. Default: current directory.", +) +@click.option( + "--max-requests", + default=None, + type=int, + help="Maximum number of requests to execute.", +) +@click.option( + "--max-errors", + default=None, + type=int, + help="Maximum number of errors before stopping benchmark.", +) +@click.option( + "--max-duration", + default=None, + type=float, + help="Maximum duration in seconds for benchmark execution.", +) +# Embeddings-specific quality validation options +@click.option( + "--enable-quality-validation", + is_flag=True, + default=False, + help="Enable quality validation using cosine similarity against baseline model.", +) +@click.option( + "--baseline-model", + default=None, + type=str, + help=( + "HuggingFace model for baseline comparison. " + "E.g., 'sentence-transformers/all-MiniLM-L6-v2'. " + "Defaults to target model if not specified." + ), +) +@click.option( + "--quality-tolerance", + default=1e-2, + type=float, + help=( + "Cosine similarity tolerance threshold. " + "Default: 1e-2 (standard), use 5e-4 for MTEB-level validation." + ), +) +@click.option( + "--enable-mteb", + is_flag=True, + default=False, + help="Enable MTEB benchmark evaluation for standardized quality scoring.", +) +@click.option( + "--mteb-tasks", + callback=cli_tools.parse_list, + default=None, + help=( + "Comma-separated list of MTEB tasks. " + "Default: STS12,STS13,STSBenchmark. E.g., 'STS12,STS13,STS14'" + ), +) +@click.option( + "--encoding-format", + type=click.Choice(["float", "base64"]), + default="float", + help="Embedding encoding format. Options: float, base64. Default: float.", +) +@click.option( + "--disable-console", + is_flag=True, + default=False, + help="Disable all console output (including progress display).", +) +@click.option( + "--disable-console-interactive", + is_flag=True, + default=False, + help="Disable interactive console elements (progress bar, tables).", +) +@click.option( + "--random-seed", + default=42, + type=int, + help="Random seed for reproducibility. Default: 42.", +) +def embeddings(**kwargs): + """Run embeddings benchmark with optional quality validation.""" + from guidellm.benchmark.embeddings_entrypoints import benchmark_embeddings + from guidellm.benchmark.schemas.embeddings import BenchmarkEmbeddingsArgs + + # Only set CLI args that differ from click defaults + kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs) + + # Handle console options + disable_console = kwargs.pop("disable_console", False) + disable_console_interactive = ( + kwargs.pop("disable_console_interactive", False) or disable_console + ) + console = Console() if not disable_console else None + + envs = cli_tools.list_set_env() + if console and envs: + console.print_update( + title=( + "Note: the following environment variables " + "are set and **may** affect configuration" + ), + details=", ".join(envs), + status="warning", + ) + + try: + args = BenchmarkEmbeddingsArgs.create(scenario=None, **kwargs) + except ValidationError as err: + errs = err.errors(include_url=False, include_context=True, include_input=True) + param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-") + raise click.BadParameter( + errs[0]["msg"], ctx=click.get_current_context(), param_hint=param_name + ) from err + + if uvloop is not None: + asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) + + asyncio.run( + benchmark_embeddings( + args=args, + progress=( + GenerativeConsoleBenchmarkerProgress() + if not disable_console_interactive + else None + ), + console=console, + ) + ) + + if __name__ == "__main__": cli() diff --git a/src/guidellm/backends/openai/http.py b/src/guidellm/backends/openai/http.py index d94f30909..1f64fc9f7 100644 --- a/src/guidellm/backends/openai/http.py +++ b/src/guidellm/backends/openai/http.py @@ -38,6 +38,8 @@ "/v1/chat/completions": "v1/chat/completions", "/v1/audio/transcriptions": "v1/audio/transcriptions", "/v1/audio/translations": "v1/audio/translations", + "/v1/embeddings": "v1/embeddings", + "embeddings": "v1/embeddings", # Alias for convenience } DEFAULT_API = "/v1/chat/completions" @@ -50,6 +52,9 @@ "audio_translations": "/v1/audio/translations", } +# NOTE: This value is taken from httpx's default +FALLBACK_TIMEOUT = 5.0 + @Backend.register("openai_http") class OpenAIHTTPBackend(Backend): @@ -83,7 +88,8 @@ def __init__( api_key: str | None = None, api_routes: dict[str, str] | None = None, request_handlers: dict[str, Any] | None = None, - timeout: float = 60.0, + timeout: float | None = None, + timeout_connect: float | None = FALLBACK_TIMEOUT, http2: bool = True, follow_redirects: bool = True, verify: bool = False, @@ -133,6 +139,7 @@ def __init__( self.api_routes = api_routes or DEFAULT_API_PATHS self.request_handlers = request_handlers self.timeout = timeout + self.timeout_connect = timeout_connect self.http2 = http2 self.follow_redirects = follow_redirects self.verify = verify @@ -162,6 +169,7 @@ def info(self) -> dict[str, Any]: "target": self.target, "model": self.model, "timeout": self.timeout, + "timeout_connect": self.timeout_connect, "http2": self.http2, "follow_redirects": self.follow_redirects, "verify": self.verify, @@ -182,7 +190,11 @@ async def process_startup(self): self._async_client = httpx.AsyncClient( http2=self.http2, - timeout=self.timeout, + timeout=httpx.Timeout( + FALLBACK_TIMEOUT, + read=self.timeout, + connect=self.timeout_connect, + ), follow_redirects=self.follow_redirects, verify=self.verify, # Allow unlimited connections diff --git a/src/guidellm/backends/openai/request_handlers.py b/src/guidellm/backends/openai/request_handlers.py index da548894c..490208dcf 100644 --- a/src/guidellm/backends/openai/request_handlers.py +++ b/src/guidellm/backends/openai/request_handlers.py @@ -13,6 +13,8 @@ import base64 from typing import Any, Protocol, cast +from more_itertools import roundrobin + from guidellm.schemas import GenerationRequest, GenerationResponse, UsageMetrics from guidellm.schemas.request import GenerationRequestArguments from guidellm.utils import RegistryMixin, json @@ -20,6 +22,7 @@ __all__ = [ "AudioRequestHandler", "ChatCompletionsRequestHandler", + "EmbeddingsRequestHandler", "OpenAIRequestHandler", "OpenAIRequestHandlerFactory", "TextCompletionsRequestHandler", @@ -363,7 +366,49 @@ class ChatCompletionsRequestHandler(TextCompletionsRequestHandler): both streaming and non-streaming chat completion responses. """ - def format( # noqa: C901, PLR0912, PLR0915 + def _format_prompts( + self, column_data: list[dict[str, Any]], column_type: str + ) -> list[dict[str, Any]]: + """ + Helper method to format different types of data columns + into the appropriate structure for chat messages. + """ + formatted_data = [] + for item in column_data: + if column_type == "text_column": + formatted_data.append({"type": "text", "text": item}) + elif column_type == "image_column": + formatted_data.append( + { + "type": "image_url", + "image_url": {"url": item.get("image")}, + } + ) + elif column_type == "video_column": + formatted_data.append( + { + "type": "video_url", + "video_url": {"url": item.get("video")}, + } + ) + elif column_type == "audio_column": + formatted_data.append( + { + "type": "input_audio", + "input_audio": { + "data": base64.b64encode(item.get("audio", b"")).decode( + "utf-8" + ), + "format": item.get("format"), + }, + } + ) + else: + raise ValueError(f"Unsupported column type: {column_type}") + + return formatted_data + + def format( self, data: GenerationRequest, **kwargs, @@ -410,71 +455,20 @@ def format( # noqa: C901, PLR0912, PLR0915 # Build messages arguments.body["messages"] = [] - for prefix in data.columns.get("prefix_column", []): - if not prefix: - continue - + # Build the system prompt + prefix = " ".join(data.columns.get("prefix_column", [])) + if prefix: arguments.body["messages"].append({"role": "system", "content": prefix}) - for text in data.columns.get("text_column", []): - if not text: - continue - + # Build each prompt then combine into a single user message + prompts = [ + self._format_prompts(data.columns.get(col, []), col) + for col in ("text_column", "image_column", "video_column", "audio_column") + ] + if prompts: + # Interleave prompt types arguments.body["messages"].append( - {"role": "user", "content": [{"type": "text", "text": text}]} - ) - - for image in data.columns.get("image_column", []): - if not image: - continue - - arguments.body["messages"].append( - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": {"url": image.get("image")}, - } - ], - } - ) - - for video in data.columns.get("video_column", []): - if not video: - continue - - arguments.body["messages"].append( - { - "role": "user", - "content": [ - { - "type": "video_url", - "video_url": {"url": video.get("video")}, - } - ], - } - ) - - for audio in data.columns.get("audio_column", []): - if not audio: - continue - - arguments.body["messages"].append( - { - "role": "user", - "content": [ - { - "type": "input_audio", - "input_audio": { - "data": base64.b64encode( - audio.get("audio", b"") - ).decode("utf-8"), - "format": audio.get("format"), - }, - } - ], - } + {"role": "user", "content": list(roundrobin(*prompts))} ) return arguments @@ -667,3 +661,113 @@ def extract_metrics( text_words=len(text.split()) if text else 0, text_characters=len(text) if text else 0, ) + + +@OpenAIRequestHandlerFactory.register("/v1/embeddings") +class EmbeddingsRequestHandler(OpenAIRequestHandler): + """ + Request handler for OpenAI-style embeddings endpoints. + + Handles embeddings requests which do not support streaming and return + embedding vectors instead of generated text. Processes input text into + embeddings with optional quality validation support. + """ + + def format( + self, + data: GenerationRequest, + **kwargs, + ) -> GenerationRequestArguments: + """ + Format the embeddings generation request. + + :param data: The generation request to format + :param **kwargs: Additional keyword arguments (model, encoding_format, etc.) + :return: The formatted request arguments + """ + arguments = GenerationRequestArguments() + arguments.body = {} + arguments.stream = False # Embeddings never stream + + # Add model + if kwargs.get("model") is not None: + arguments.body["model"] = kwargs["model"] + + # Build input from text columns + input_texts = [] + for text in data.columns.get("text_column", []): + if text: + input_texts.append(text) + + # Use single string if only one text, otherwise list + if len(input_texts) == 1: + arguments.body["input"] = input_texts[0] + else: + arguments.body["input"] = input_texts + + # Add optional parameters + if kwargs.get("encoding_format"): + arguments.body["encoding_format"] = kwargs["encoding_format"] + if kwargs.get("dimensions"): + arguments.body["dimensions"] = kwargs["dimensions"] + if kwargs.get("truncate_prompt_tokens"): + arguments.body["truncate_prompt_tokens"] = kwargs["truncate_prompt_tokens"] + + # Apply extra arguments + if kwargs.get("extras"): + arguments.body.update(kwargs["extras"]) + + return arguments + + def compile_non_streaming( + self, + request: GenerationRequest, + arguments: GenerationRequestArguments, + response: Any, + ) -> GenerationResponse: + """ + Process a complete non-streaming embeddings API response. + + :param request: Original generation request + :param arguments: Request arguments used + :param response: Raw API response data + :return: GenerationResponse with embeddings data + """ + # Extract usage data + usage = response.get("usage", {}) + + # Build response (no text output for embeddings) + return GenerationResponse( + request_id=request.request_id, + request_args=arguments.model_dump_json(), + text="", # Embeddings don't generate text + input_metrics=UsageMetrics( + text_tokens=usage.get("prompt_tokens", 0), + ), + output_metrics=UsageMetrics( + text_tokens=0, # No output tokens for embeddings + ), + ) + + def add_streaming_line(self, line: str) -> int | None: + """ + Embeddings do not support streaming. + + :param line: Streaming line (unused) + :return: None (not supported) + :raises NotImplementedError: Embeddings never stream + """ + raise NotImplementedError("Embeddings do not support streaming") + + def compile_streaming( + self, request: GenerationRequest, arguments: GenerationRequestArguments + ) -> GenerationResponse: + """ + Embeddings do not support streaming. + + :param request: Generation request (unused) + :param arguments: Request arguments (unused) + :return: Never returns + :raises NotImplementedError: Embeddings never stream + """ + raise NotImplementedError("Embeddings do not support streaming") diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py index 56cdb9a72..c0caba404 100644 --- a/src/guidellm/benchmark/benchmarker.py +++ b/src/guidellm/benchmark/benchmarker.py @@ -64,7 +64,7 @@ async def run( environment: Environment, warmup: TransientPhaseConfig, cooldown: TransientPhaseConfig, - sample_requests: int | None = 20, + sample_requests: int | None = None, prefer_response_metrics: bool = True, progress: ( BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None diff --git a/src/guidellm/benchmark/embeddings_entrypoints.py b/src/guidellm/benchmark/embeddings_entrypoints.py new file mode 100644 index 000000000..a49dee801 --- /dev/null +++ b/src/guidellm/benchmark/embeddings_entrypoints.py @@ -0,0 +1,310 @@ +""" +Primary interface for executing embeddings benchmarks. + +This module orchestrates embeddings benchmarking workflows by coordinating backend +initialization, data loading, profile configuration, optional quality validation, +and output generation. Provides the main entry point `benchmark_embeddings` for +executing new embeddings benchmarks with comprehensive metric tracking. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, cast + +from guidellm.benchmark.benchmarker import Benchmarker +from guidellm.benchmark.entrypoints import ( + resolve_backend, + resolve_processor, + resolve_profile, + resolve_request_loader, +) +from guidellm.benchmark.outputs import ( + EmbeddingsBenchmarkerConsole, + EmbeddingsBenchmarkerOutput, +) +from guidellm.benchmark.progress import GenerativeConsoleBenchmarkerProgress +from guidellm.benchmark.schemas.base import TransientPhaseConfig +from guidellm.benchmark.schemas.embeddings import ( + BenchmarkEmbeddingsArgs, + EmbeddingsBenchmark, + EmbeddingsBenchmarkAccumulator, + EmbeddingsBenchmarksReport, +) +from guidellm.scheduler import ConstraintInitializer, NonDistributedEnvironment +from guidellm.schemas import GenerationRequest, GenerationResponse +from guidellm.utils import Console + +__all__ = ["benchmark_embeddings"] + + +async def resolve_embeddings_output_formats( + outputs: list[str] | tuple[str], + output_dir: str | Path | None, + console: Console | None = None, +) -> dict[str, EmbeddingsBenchmarkerOutput]: + """ + Resolve output format specifications into configured embeddings output + handler instances. + + :param outputs: Specification of desired output files/types + :param output_dir: Base path for output file generation, or None for + default + :param console: Console instance for progress reporting, or None + :return: Dictionary mapping format names to configured output handler + instances + """ + console_step = ( + console.print_update_step(title="Resolving output formats") + if console + else None + ) + + resolved = EmbeddingsBenchmarkerOutput.resolve( + outputs=outputs, output_dir=output_dir + ) + + if console_step: + console_step.finish( + title="Output formats resolved", + details={key: str(val) for key, val in resolved.items()}, + status_level="success", + ) + + return resolved + + +async def benchmark_embeddings( # noqa: C901, PLR0912, PLR0915 + args: BenchmarkEmbeddingsArgs, + progress: GenerativeConsoleBenchmarkerProgress | None = None, + console: Console | None = None, + **constraints: str | ConstraintInitializer | Any, +) -> tuple[EmbeddingsBenchmarksReport, dict[str, Any]]: + """ + Execute a comprehensive embeddings benchmarking workflow. + + Orchestrates the full embeddings benchmarking pipeline by resolving all + components from provided arguments, executing benchmark runs across + configured profiles, and finalizing results in specified output formats. + Optionally performs quality validation using cosine similarity and MTEB + benchmarks. + + :param args: Configuration arguments for the embeddings benchmark + execution + :param progress: Progress tracker for benchmark execution, or None for + no tracking + :param console: Console instance for status reporting, or None for + silent operation + :param constraints: Additional constraint initializers for benchmark + limits + :return: Tuple of EmbeddingsBenchmarksReport and dictionary of output + format results + + Example: + :: + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + data=["dataset.json"], + enable_quality_validation=True, + baseline_model="sentence-transformers/all-MiniLM-L6-v2" + ) + report, outputs = await benchmark_embeddings(args) + """ + # Resolve backend + backend, model = await resolve_backend( + backend=args.backend, + target=args.target, + model=args.model, + request_format=args.request_format or "/v1/embeddings", + console=console, + **(args.backend_kwargs or {}), + ) + + # Resolve processor (tokenizer) + processor = await resolve_processor( + processor=args.processor, model=model, console=console + ) + + # Resolve request loader for embeddings data + request_loader = await resolve_request_loader( + data=args.data, + model=model, + data_args=args.data_args, + data_samples=args.data_samples, + processor=processor, + processor_args=args.processor_args, + data_column_mapper=args.data_column_mapper, + data_preprocessors=args.data_preprocessors, + data_preprocessors_kwargs=args.data_preprocessors_kwargs, + data_finalizer=args.data_finalizer, + data_collator=args.data_collator, + data_sampler=args.data_sampler, + data_num_workers=args.data_num_workers, + random_seed=args.random_seed, + console=console, + **(args.dataloader_kwargs or {}), + ) + + # Resolve transient phases + warmup = TransientPhaseConfig.create_from_value(args.warmup) + cooldown = TransientPhaseConfig.create_from_value(args.cooldown) + if console: + console.print_update( + title="Resolved transient phase configurations", + details="\n".join( + [ + f"Warmup: {warmup}", + f"Cooldown: {cooldown}", + ] + ), + status="success", + ) + + # Resolve profile + profile = await resolve_profile( + profile=args.profile, + rate=args.rate, + random_seed=args.random_seed, + rampup=0.0, # Embeddings typically don't use rampup + constraints=constraints, + max_seconds=args.max_duration, + max_requests=args.max_requests, + max_errors=args.max_errors, + max_error_rate=None, + max_global_error_rate=None, + over_saturation=None, + console=console, + ) + + # Resolve output formats + output_formats = await resolve_embeddings_output_formats( + outputs=args.outputs, output_dir=args.output_dir, console=console + ) + + # Initialize quality validation if requested + if args.enable_quality_validation: + if console: + console.print_update( + title="Initializing quality validation", + details=f"Baseline model: {args.baseline_model or model}", + status="info", + ) + + try: + from guidellm.benchmark.quality import EmbeddingsQualityValidator + + _ = EmbeddingsQualityValidator( + baseline_model=args.baseline_model or model, + tolerance=args.quality_tolerance, + ) + + if console: + console.print_update( + title="Quality validation initialized", + details=f"Tolerance: {args.quality_tolerance}", + status="success", + ) + except ImportError: + if console: + console.print_update( + title="Quality validation unavailable", + details=( + "sentence-transformers not installed. " + "Install with: pip install sentence-transformers" + ), + status="warning", + ) + + # Run MTEB evaluation if requested (before main benchmark) + mteb_results = None + if args.enable_mteb: + if console: + console.print_update( + title="Running MTEB evaluation", + details=f"Tasks: {args.mteb_tasks or 'default'}", + status="info", + ) + + try: + from guidellm.benchmark.quality import MTEBValidator + + mteb_validator = MTEBValidator( + model_name=args.baseline_model or model, + task_names=args.mteb_tasks, + ) + mteb_results = mteb_validator.run_evaluation() + + if console: + console.print_update( + title="MTEB evaluation complete", + details=f"Main score: {mteb_results['mteb_main_score']:.4f}", + status="success", + ) + except ImportError: + if console: + console.print_update( + title="MTEB evaluation unavailable", + details="mteb not installed. Install with: pip install mteb", + status="warning", + ) + + # Create report + report = EmbeddingsBenchmarksReport(args=args) + + if console: + console.print_update( + title="Setup complete, starting embeddings benchmarks...", status="success" + ) + console.print("\n\n") + + # Run benchmarks + benchmarker: Benchmarker[ + EmbeddingsBenchmark, GenerationRequest, GenerationResponse + ] = Benchmarker() + + async for benchmark in benchmarker.run( + accumulator_class=EmbeddingsBenchmarkAccumulator, + benchmark_class=EmbeddingsBenchmark, + requests=request_loader, + backend=backend, + profile=profile, + environment=NonDistributedEnvironment(), + progress=cast("Any", progress), # type: ignore[arg-type] + sample_requests=False, # Embeddings don't need request sampling + warmup=warmup, + cooldown=cooldown, + prefer_response_metrics=True, # Prefer API-provided metrics + ): + if benchmark: + # Inject MTEB results if available + if mteb_results and benchmark.metrics.quality: + benchmark.metrics.quality.mteb_main_score = mteb_results[ + "mteb_main_score" + ] + benchmark.metrics.quality.mteb_task_scores = mteb_results[ + "mteb_task_scores" + ] + + report.benchmarks.append(benchmark) + + # Finalize outputs + output_format_results = {} + for key, output in output_formats.items(): + output_result = await output.finalize(report) + output_format_results[key] = output_result + + # Print console output + if console: + await EmbeddingsBenchmarkerConsole(console=console).finalize(report) + console.print("\n\n") + console.print_update( + title=( + "Embeddings benchmarking complete, generated " + f"{len(report.benchmarks)} benchmark(s)" + ), + status="success", + ) + for key, value in output_format_results.items(): + console.print_update(title=f" {key:<8}: {value}", status="debug") + + return report, output_format_results diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index dd634d9a5..89dd8c044 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -39,7 +39,6 @@ DatasetFinalizer, DatasetPreprocessor, FinalizerRegistry, - GenerativeRequestCollator, PreprocessorRegistry, ProcessorFactory, ) @@ -237,12 +236,12 @@ async def resolve_request_loader( data_column_mapper: ( DatasetPreprocessor | dict[str, str | list[str]] - | Literal["generative_column_mapper"] + | Literal["generative_column_mapper", "embeddings_column_mapper"] ), data_preprocessors: list[DatasetPreprocessor | dict[str, str | list[str]] | str], data_preprocessors_kwargs: dict[str, Any], data_finalizer: (DatasetFinalizer | dict[str, Any] | str), - data_collator: Callable | Literal["generative"] | None, + data_collator: Callable | Literal["generative", "embeddings"] | None, data_sampler: Sampler[int] | Literal["shuffle"] | None, data_num_workers: int | None, random_seed: int, @@ -306,6 +305,16 @@ async def resolve_request_loader( data_finalizer, ) + # Resolve collator from string or use provided callable + if callable(data_collator): + collator_instance = data_collator + elif data_collator == "embeddings": + from guidellm.data import EmbeddingsRequestCollator + collator_instance = EmbeddingsRequestCollator() + else: # default to "generative" or None + from guidellm.data import GenerativeRequestCollator + collator_instance = GenerativeRequestCollator() + request_loader: DataLoader[GenerationRequest] = DataLoader( data=data, data_args=data_args, @@ -316,9 +325,7 @@ async def resolve_request_loader( ), preprocessors=preprocessors_list, finalizer=finalizer_instance, - collator=( - data_collator if callable(data_collator) else GenerativeRequestCollator() - ), + collator=collator_instance, sampler=data_sampler, num_workers=data_num_workers, random_seed=random_seed, diff --git a/src/guidellm/benchmark/outputs/__init__.py b/src/guidellm/benchmark/outputs/__init__.py index 2e321605d..75c4b6b88 100644 --- a/src/guidellm/benchmark/outputs/__init__.py +++ b/src/guidellm/benchmark/outputs/__init__.py @@ -11,11 +11,20 @@ from .console import GenerativeBenchmarkerConsole from .csv import GenerativeBenchmarkerCSV +from .embeddings_console import EmbeddingsBenchmarkerConsole +from .embeddings_csv import EmbeddingsBenchmarkerCSV +from .embeddings_html import EmbeddingsBenchmarkerHTML +from .embeddings_serialized import EmbeddingsBenchmarkerSerialized from .html import GenerativeBenchmarkerHTML -from .output import GenerativeBenchmarkerOutput +from .output import EmbeddingsBenchmarkerOutput, GenerativeBenchmarkerOutput from .serialized import GenerativeBenchmarkerSerialized __all__ = [ + "EmbeddingsBenchmarkerCSV", + "EmbeddingsBenchmarkerConsole", + "EmbeddingsBenchmarkerHTML", + "EmbeddingsBenchmarkerOutput", + "EmbeddingsBenchmarkerSerialized", "GenerativeBenchmarkerCSV", "GenerativeBenchmarkerConsole", "GenerativeBenchmarkerHTML", diff --git a/src/guidellm/benchmark/outputs/console.py b/src/guidellm/benchmark/outputs/console.py index 70070c425..d84e433f5 100644 --- a/src/guidellm/benchmark/outputs/console.py +++ b/src/guidellm/benchmark/outputs/console.py @@ -265,19 +265,31 @@ def print_run_summary_table(self, report: GenerativeBenchmarksReport): (benchmark.metrics.output_token_count, "Output Tokens"), ]: columns.add_value( - token_metrics.successful.total_sum, + ( + token_metrics.successful.total_sum + if token_metrics.successful is not None + else 0.0 + ), group=group, name="Comp", units="Tot", ) columns.add_value( - token_metrics.incomplete.total_sum, + ( + token_metrics.incomplete.total_sum + if token_metrics.incomplete is not None + else 0.0 + ), group=group, name="Inc", units="Tot", ) columns.add_value( - token_metrics.errored.total_sum, + ( + token_metrics.errored.total_sum + if token_metrics.errored is not None + else 0.0 + ), group=group, name="Err", units="Tot", diff --git a/src/guidellm/benchmark/outputs/csv.py b/src/guidellm/benchmark/outputs/csv.py index 081886cfd..eb4479d25 100644 --- a/src/guidellm/benchmark/outputs/csv.py +++ b/src/guidellm/benchmark/outputs/csv.py @@ -621,7 +621,7 @@ def _add_scheduler_metrics( """ metrics = benchmark.scheduler_metrics - requests_made_fields: list[tuple[str, int]] = [ + requests_made_fields: list[tuple[str, int | None]] = [ ("Requests Made Successful", metrics.requests_made.successful), ("Requests Made Incomplete", metrics.requests_made.incomplete), ("Requests Made Errored", metrics.requests_made.errored), diff --git a/src/guidellm/benchmark/outputs/embeddings_console.py b/src/guidellm/benchmark/outputs/embeddings_console.py new file mode 100644 index 000000000..848439cc4 --- /dev/null +++ b/src/guidellm/benchmark/outputs/embeddings_console.py @@ -0,0 +1,284 @@ +""" +Console output formatter for embeddings benchmarker results. + +Provides console-based output formatting for embeddings benchmark reports, +organizing metrics into structured tables that display request statistics, +latency measurements, throughput data, and optional quality validation metrics +(cosine similarity, MTEB scores). Simplified compared to generative output since +embeddings don't have output tokens or streaming behavior. +""" + +from __future__ import annotations + +from typing import Any + +from pydantic import Field + +from guidellm.benchmark.outputs.console import ConsoleTableColumnsCollection +from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput +from guidellm.benchmark.schemas.embeddings import EmbeddingsBenchmarksReport +from guidellm.utils import Console + +__all__ = ["EmbeddingsBenchmarkerConsole"] + + +@EmbeddingsBenchmarkerOutput.register(["console"]) +class EmbeddingsBenchmarkerConsole(EmbeddingsBenchmarkerOutput): + """ + Console output formatter for embeddings benchmark reports. + + Renders embeddings benchmark results as formatted tables in the terminal, + organizing metrics by category (run summary, request counts, latency, + throughput, quality validation) with proper alignment and type-specific + formatting for readability. + """ + + @classmethod + def validated_kwargs(cls, *_args, **_kwargs) -> dict[str, Any]: + """ + Validate and return keyword arguments for initialization. + + :return: Empty dict as no additional kwargs are required + """ + return {} + + console: Console = Field( + default_factory=Console, + description="Console utility for rendering formatted tables", + ) + + async def finalize(self, report: EmbeddingsBenchmarksReport) -> None: + """ + Print the complete embeddings benchmark report to the console. + + Renders all metric tables including run summary, request counts, latency, + throughput, and quality metrics to the console. + + :param report: The completed embeddings benchmark report + :return: None (console output only) + """ + self.print_run_summary_table(report) + self.print_request_counts_table(report) + self.print_request_latency_table(report) + self.print_server_throughput_table(report) + self.print_quality_metrics_table(report) + + def print_run_summary_table(self, report: EmbeddingsBenchmarksReport): + """ + Print the run summary table with timing and token information. + + :param report: The embeddings benchmark report containing run metadata + """ + columns = ConsoleTableColumnsCollection() + + for benchmark in report.benchmarks: + columns.add_value( + benchmark.config.strategy.type_, + group="Benchmark", + name="Strategy", + type_="text", + ) + columns.add_value( + benchmark.start_time, group="Timings", name="Start", type_="timestamp" + ) + columns.add_value( + benchmark.end_time, group="Timings", name="End", type_="timestamp" + ) + columns.add_value( + benchmark.duration, group="Timings", name="Dur", units="Sec" + ) + columns.add_value( + benchmark.warmup_duration, group="Timings", name="Warm", units="Sec" + ) + columns.add_value( + benchmark.cooldown_duration, group="Timings", name="Cool", units="Sec" + ) + + # Only input tokens for embeddings (no output tokens) + token_metrics = benchmark.metrics.input_tokens_count + columns.add_value( + token_metrics.successful, + group="Input Tokens", + name="Comp", + units="Tot", + ) + columns.add_value( + token_metrics.incomplete, + group="Input Tokens", + name="Inc", + units="Tot", + ) + columns.add_value( + token_metrics.errored, + group="Input Tokens", + name="Err", + units="Tot", + ) + + headers, values = columns.get_table_data() + self.console.print("\n") + self.console.print_table(headers, values, title="Run Summary") + + def print_request_counts_table(self, report: EmbeddingsBenchmarksReport): + """ + Print the request counts table. + + :param report: The embeddings benchmark report + """ + columns = ConsoleTableColumnsCollection() + + for benchmark in report.benchmarks: + columns.add_value( + benchmark.config.strategy.type_, + group="Benchmark", + name="Strategy", + type_="text", + ) + + for status in ["successful", "incomplete", "errored", "total"]: + count = getattr(benchmark.metrics.request_totals, status) + columns.add_value( + count, + group="Request Counts", + name=status.capitalize(), + units="Reqs", + ) + + headers, values = columns.get_table_data() + self.console.print("\n") + self.console.print_table(headers, values, title="Request Counts") + + def print_request_latency_table(self, report: EmbeddingsBenchmarksReport): + """ + Print the request latency table. + + :param report: The embeddings benchmark report + """ + columns = ConsoleTableColumnsCollection() + + for benchmark in report.benchmarks: + columns.add_value( + benchmark.config.strategy.type_, + group="Benchmark", + name="Strategy", + type_="text", + ) + + # Request latency stats + columns.add_stats( + benchmark.metrics.request_latency, + status="successful", + group="Request Latency", + name="Latency", + precision=3, + ) + + # Request concurrency + columns.add_stats( + benchmark.metrics.request_concurrency, + status="successful", + group="Concurrency", + name="Concurrent", + precision=1, + ) + + headers, values = columns.get_table_data() + self.console.print("\n") + self.console.print_table(headers, values, title="Request Latency") + + def print_server_throughput_table(self, report: EmbeddingsBenchmarksReport): + """ + Print the server throughput table. + + :param report: The embeddings benchmark report + """ + columns = ConsoleTableColumnsCollection() + + for benchmark in report.benchmarks: + columns.add_value( + benchmark.config.strategy.type_, + group="Benchmark", + name="Strategy", + type_="text", + ) + + # Requests per second + columns.add_stats( + benchmark.metrics.requests_per_second, + status="successful", + group="Request Throughput", + name="Reqs", + precision=2, + ) + + # Input tokens per second + columns.add_stats( + benchmark.metrics.input_tokens_per_second, + status="successful", + group="Token Throughput", + name="Input Tok", + precision=1, + ) + + headers, values = columns.get_table_data() + self.console.print("\n") + self.console.print_table(headers, values, title="Server Throughput") + + def print_quality_metrics_table(self, report: EmbeddingsBenchmarksReport): + """ + Print the quality metrics table (if quality validation was enabled). + + :param report: The embeddings benchmark report + """ + # Check if any benchmark has quality metrics + has_quality = any( + benchmark.metrics.quality is not None for benchmark in report.benchmarks + ) + + if not has_quality: + return + + columns = ConsoleTableColumnsCollection() + + for benchmark in report.benchmarks: + columns.add_value( + benchmark.config.strategy.type_, + group="Benchmark", + name="Strategy", + type_="text", + ) + + if benchmark.metrics.quality: + # Cosine similarity + if benchmark.metrics.quality.baseline_cosine_similarity: + columns.add_stats( + benchmark.metrics.quality.baseline_cosine_similarity, + status="successful", + group="Cosine Similarity", + name="Baseline", + precision=4, + ) + + # Self-consistency + if benchmark.metrics.quality.self_consistency_score: + columns.add_stats( + benchmark.metrics.quality.self_consistency_score, + status="successful", + group="Consistency", + name="Self", + precision=4, + ) + + # MTEB main score + if benchmark.metrics.quality.mteb_main_score is not None: + columns.add_value( + benchmark.metrics.quality.mteb_main_score, + group="MTEB", + name="Main", + units="Score", + precision=4, + ) + + headers, values = columns.get_table_data() + self.console.print("\n") + self.console.print_table(headers, values, title="Quality Metrics") diff --git a/src/guidellm/benchmark/outputs/embeddings_csv.py b/src/guidellm/benchmark/outputs/embeddings_csv.py new file mode 100644 index 000000000..c83f3f718 --- /dev/null +++ b/src/guidellm/benchmark/outputs/embeddings_csv.py @@ -0,0 +1,393 @@ +""" +CSV output formatter for embeddings benchmark results. + +Provides CSV export functionality for embeddings benchmark reports with comprehensive +metrics including timing, throughput, latency, input token data, and optional quality +validation metrics (cosine similarity, MTEB scores). Uses multi-row headers to organize +metrics hierarchically without output tokens or streaming behavior. +""" + +from __future__ import annotations + +import csv +from pathlib import Path +from typing import TYPE_CHECKING, Annotated, Any, ClassVar + +from pydantic import Field + +if TYPE_CHECKING: + from _csv import _writer + +from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput +from guidellm.benchmark.schemas.embeddings import ( + EmbeddingsBenchmark, + EmbeddingsBenchmarksReport, +) +from guidellm.schemas import DistributionSummary, StatusDistributionSummary +from guidellm.utils import safe_format_timestamp + +__all__ = ["EmbeddingsBenchmarkerCSV"] + +TIMESTAMP_FORMAT: Annotated[str, "Format string for timestamp output in CSV files"] = ( + "%Y-%m-%d %H:%M:%S" +) + + +@EmbeddingsBenchmarkerOutput.register("csv") +class EmbeddingsBenchmarkerCSV(EmbeddingsBenchmarkerOutput): + """ + CSV output formatter for embeddings benchmark results. + + Exports comprehensive embeddings benchmark data to CSV format with + multi-row headers organizing metrics into categories including run + information, timing, request counts, latency, throughput, input token + data, quality validation metrics, and scheduler state. Each benchmark run + becomes a row with statistical distributions represented as mean, median, + standard deviation, and percentiles. + + :cvar DEFAULT_FILE: Default filename for CSV output + """ + + DEFAULT_FILE: ClassVar[str] = "embeddings_benchmarks.csv" + + @classmethod + def validated_kwargs( + cls, output_path: str | Path | None, **_kwargs + ) -> dict[str, Any]: + """ + Validate and normalize constructor keyword arguments. + + :param output_path: Path for CSV output file or directory + :param _kwargs: Additional keyword arguments (ignored) + :return: Normalized keyword arguments dictionary + """ + new_kwargs = {} + if output_path is not None: + new_kwargs["output_path"] = ( + Path(output_path) if not isinstance(output_path, Path) else output_path + ) + return new_kwargs + + output_path: Path = Field( + default_factory=lambda: Path.cwd(), + description=( + "Path where the CSV file will be saved, defaults to current " + "directory" + ), + ) + + async def finalize(self, report: EmbeddingsBenchmarksReport) -> Path: + """ + Save the embeddings benchmark report as a CSV file. + + :param report: The completed embeddings benchmark report + :return: Path to the saved CSV file + """ + output_path = self.output_path + if output_path.is_dir(): + output_path = output_path / EmbeddingsBenchmarkerCSV.DEFAULT_FILE + output_path.parent.mkdir(parents=True, exist_ok=True) + + with output_path.open("w", newline="") as file: + writer = csv.writer(file) + headers: list[list[str]] = [] + rows: list[list[str | int | float]] = [] + + for benchmark in report.benchmarks: + benchmark_headers: list[list[str]] = [] + benchmark_values: list[str | int | float] = [] + + self._add_run_info(benchmark, benchmark_headers, benchmark_values) + self._add_benchmark_info(benchmark, benchmark_headers, benchmark_values) + self._add_timing_info(benchmark, benchmark_headers, benchmark_values) + self._add_request_counts(benchmark, benchmark_headers, benchmark_values) + self._add_request_latency_metrics( + benchmark, benchmark_headers, benchmark_values + ) + self._add_server_throughput_metrics( + benchmark, benchmark_headers, benchmark_values + ) + self._add_input_token_metrics( + benchmark, benchmark_headers, benchmark_values + ) + self._add_quality_metrics( + benchmark, benchmark_headers, benchmark_values + ) + self._add_scheduler_info( + benchmark, benchmark_headers, benchmark_values + ) + self._add_runtime_info(report, benchmark_headers, benchmark_values) + + if not headers: + headers = benchmark_headers + rows.append(benchmark_values) + + self._write_multirow_header(writer, headers) + for row in rows: + writer.writerow(row) + + return output_path + + def _write_multirow_header( + self, writer: _writer, headers: list[list[str]] + ) -> None: + """ + Write multi-row header to CSV file. + + Transposes column-wise headers into row-wise header rows with proper + alignment for hierarchical metric organization. + + :param writer: CSV writer instance + :param headers: List of header columns, each column is [group, name, units] + """ + if not headers: + return + + num_rows = max(len(header) for header in headers) + header_rows: list[list[str]] = [[] for _ in range(num_rows)] + + for header in headers: + for i in range(num_rows): + header_rows[i].append(header[i] if i < len(header) else "") + + for row in header_rows: + writer.writerow(row) + + def _add_run_info( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add run identification information.""" + headers.append(["Run Info", "Model", ""]) + model = ( + benchmark.config.requests.get("model", "N/A") + if isinstance(benchmark.config.requests, dict) + else "N/A" + ) + values.append(model) + + headers.append(["Run Info", "Backend", ""]) + backend = ( + benchmark.config.backend.get("type", "N/A") + if isinstance(benchmark.config.backend, dict) + else "N/A" + ) + values.append(backend) + + def _add_benchmark_info( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add benchmark configuration information.""" + headers.append(["Benchmark", "Strategy", ""]) + values.append(benchmark.config.strategy.type_) + + if hasattr(benchmark.config.strategy, "rate"): + headers.append(["Benchmark", "Rate", "Req/s"]) + values.append(benchmark.config.strategy.rate or 0) + + def _add_timing_info( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add timing information.""" + headers.append(["Timings", "Start", ""]) + values.append(safe_format_timestamp(benchmark.start_time, TIMESTAMP_FORMAT)) + + headers.append(["Timings", "End", ""]) + values.append(safe_format_timestamp(benchmark.end_time, TIMESTAMP_FORMAT)) + + headers.append(["Timings", "Duration", "Sec"]) + values.append(benchmark.duration) + + headers.append(["Timings", "Warmup", "Sec"]) + values.append(benchmark.warmup_duration) + + headers.append(["Timings", "Cooldown", "Sec"]) + values.append(benchmark.cooldown_duration) + + def _add_request_counts( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add request count information.""" + for status in ["successful", "incomplete", "errored", "total"]: + count = getattr(benchmark.metrics.request_totals, status) + headers.append(["Request Counts", status.capitalize(), "Reqs"]) + values.append(count) + + def _add_request_latency_metrics( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add request latency metrics.""" + self._add_stats_for_metric( + headers, + values, + benchmark.metrics.request_latency, + "Request Latency", + "Latency (s)", + ) + + self._add_stats_for_metric( + headers, + values, + benchmark.metrics.request_concurrency, + "Concurrency", + "Concurrent Reqs", + ) + + def _add_server_throughput_metrics( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add server throughput metrics.""" + self._add_stats_for_metric( + headers, + values, + benchmark.metrics.requests_per_second, + "Request Throughput", + "Reqs/s", + ) + + self._add_stats_for_metric( + headers, + values, + benchmark.metrics.input_tokens_per_second, + "Token Throughput", + "Input Tok/s", + ) + + def _add_input_token_metrics( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add input token count metrics (no output tokens for embeddings).""" + for status in ["successful", "incomplete", "errored", "total"]: + count = getattr(benchmark.metrics.input_tokens_count, status) + headers.append(["Input Tokens", status.capitalize(), "Tokens"]) + values.append(count) + + def _add_quality_metrics( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add quality validation metrics if available.""" + if not benchmark.metrics.quality: + return + + # Cosine similarity + if benchmark.metrics.quality.baseline_cosine_similarity: + self._add_stats_for_metric( + headers, + values, + benchmark.metrics.quality.baseline_cosine_similarity, + "Quality Validation", + "Cosine Sim", + ) + + # Self-consistency + if benchmark.metrics.quality.self_consistency_score: + self._add_stats_for_metric( + headers, + values, + benchmark.metrics.quality.self_consistency_score, + "Quality Validation", + "Consistency", + ) + + # MTEB main score + if benchmark.metrics.quality.mteb_main_score is not None: + headers.append(["MTEB", "Main Score", ""]) + values.append(benchmark.metrics.quality.mteb_main_score) + + # MTEB task scores + if benchmark.metrics.quality.mteb_task_scores: + for task, score in benchmark.metrics.quality.mteb_task_scores.items(): + headers.append(["MTEB Tasks", task, "Score"]) + values.append(score) + + def _add_scheduler_info( + self, + benchmark: EmbeddingsBenchmark, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add scheduler state information.""" + headers.append(["Scheduler", "Queued Avg", "Sec"]) + values.append(benchmark.scheduler_metrics.queued_time_avg) + + headers.append(["Scheduler", "Resolve Avg", "Sec"]) + values.append(benchmark.scheduler_metrics.resolve_time_avg) + + def _add_runtime_info( + self, + report: EmbeddingsBenchmarksReport, + headers: list[list[str]], + values: list[str | int | float], + ) -> None: + """Add runtime environment information.""" + headers.append(["Runtime", "GuideLLM Ver", ""]) + values.append(report.metadata.guidellm_version) + + headers.append(["Runtime", "Python Ver", ""]) + values.append(report.metadata.python_version) + + def _add_stats_for_metric( + self, + headers: list[list[str]], + values: list[str | int | float], + stats: StatusDistributionSummary, + group: str, + metric_name: str, + ) -> None: + """ + Add statistical columns for a metric with mean, median, stddev, and percentiles. + + :param headers: Headers list to append to + :param values: Values list to append to + :param stats: Status distribution summary containing statistics + :param group: Metric group name for header + :param metric_name: Metric display name + """ + successful_stats: DistributionSummary | None = stats.successful + + # Mean + headers.append([group, metric_name, "Mean"]) + values.append(successful_stats.mean if successful_stats else 0) + + # Median + headers.append([group, metric_name, "Median"]) + values.append(successful_stats.median if successful_stats else 0) + + # Std Dev + headers.append([group, metric_name, "StdDev"]) + values.append(successful_stats.std_dev if successful_stats else 0) + + # P95 + headers.append([group, metric_name, "P95"]) + values.append( + successful_stats.percentiles.p95 if successful_stats else 0 + ) + + # P99 + headers.append([group, metric_name, "P99"]) + values.append( + successful_stats.percentiles.p99 if successful_stats else 0 + ) diff --git a/src/guidellm/benchmark/outputs/embeddings_html.py b/src/guidellm/benchmark/outputs/embeddings_html.py new file mode 100644 index 000000000..06ffc7390 --- /dev/null +++ b/src/guidellm/benchmark/outputs/embeddings_html.py @@ -0,0 +1,347 @@ +""" +HTML output formatter for embeddings benchmark results. + +Transforms embeddings benchmark data into interactive web-based reports by +building UI data structures, converting keys to camelCase for JavaScript +compatibility, and injecting formatted data into HTML templates. Simplified +compared to generative output since embeddings don't have output tokens, +streaming behavior, or multi-modality support. +""" + +from __future__ import annotations + +import json +from copy import deepcopy +from pathlib import Path +from typing import Any, ClassVar + +from pydantic import Field + +from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput +from guidellm.benchmark.schemas.embeddings import ( + BenchmarkEmbeddingsArgs, + EmbeddingsBenchmark, + EmbeddingsBenchmarksReport, +) +from guidellm.utils import camelize_str, recursive_key_update + +__all__ = ["EmbeddingsBenchmarkerHTML"] + + +@EmbeddingsBenchmarkerOutput.register("html") +class EmbeddingsBenchmarkerHTML(EmbeddingsBenchmarkerOutput): + """ + HTML output formatter for embeddings benchmark results. + + Generates interactive HTML reports from embeddings benchmark data by + transforming results into camelCase JSON structures and injecting them into + HTML templates. The formatter processes benchmark metrics, creates + distribution visualizations, and embeds all data into a pre-built HTML + template for browser-based display. + + :cvar DEFAULT_FILE: Default filename for HTML output when a directory is + provided + """ + + DEFAULT_FILE: ClassVar[str] = "embeddings_benchmarks.html" + + output_path: Path = Field( + default_factory=lambda: Path.cwd(), + description="Directory or file path for saving the HTML report", + ) + + @classmethod + def validated_kwargs( + cls, output_path: str | Path | None, **_kwargs + ) -> dict[str, Any]: + """ + Validate and normalize output path argument. + + :param output_path: Output file or directory path for the HTML report + :return: Dictionary containing validated output_path if provided + """ + validated: dict[str, Any] = {} + if output_path is not None: + validated["output_path"] = ( + Path(output_path) if not isinstance(output_path, Path) else output_path + ) + return validated + + async def finalize(self, report: EmbeddingsBenchmarksReport) -> Path: + """ + Generate and save the HTML embeddings benchmark report. + + :param report: Completed embeddings benchmark report + :return: Path to the saved HTML report file + """ + output_path = self.output_path + if output_path.is_dir(): + output_path = output_path / self.DEFAULT_FILE + output_path.parent.mkdir(parents=True, exist_ok=True) + + data = self._build_ui_data(report.benchmarks, report.args) + camel_data = recursive_key_update(deepcopy(data), camelize_str) + + ui_api_data = { + "data": camel_data, + "guidelLmVersion": report.metadata.guidellm_version, + } + + # Load HTML template from package resources + import importlib.resources + template_content = ( + importlib.resources.files("guidellm.benchmark.outputs") + .joinpath("html_outputs/embeddings_template.html") + .read_text() + ) + + # Inject data into template + html_content = template_content.replace( + "const uiApiData = {};", + f"const uiApiData = {json.dumps(ui_api_data, indent=2)};", + ) + + output_path.write_text(html_content) + return output_path + + def _build_ui_data( + self, + benchmarks: list[EmbeddingsBenchmark], + args: BenchmarkEmbeddingsArgs, + ) -> dict[str, Any]: + """ + Build UI data structure from benchmarks and arguments. + + :param benchmarks: List of completed benchmarks + :param args: Benchmark arguments + :return: Dictionary containing all UI data + """ + return { + "run_info": { + "model": args.model or "N/A", + "backend": str(args.backend), + "task": "embeddings", + "target": args.target, + }, + "workload_details": self._build_workload_details(benchmarks), + "benchmarks": self._build_benchmarks_data(benchmarks), + } + + def _build_workload_details( + self, benchmarks: list[EmbeddingsBenchmark] + ) -> dict[str, Any]: + """ + Build workload details section. + + :param benchmarks: List of completed benchmarks + :return: Workload details dictionary + """ + if not benchmarks: + return {} + + # Sample from first benchmark + first_benchmark = benchmarks[0] + + # Build input text statistics + input_texts = [] + if first_benchmark.requests.successful is not None: + for req in first_benchmark.requests.successful[:10]: # Sample first 10 + if req.input_metrics.text_tokens: + input_texts.append( + { + "tokens": req.input_metrics.text_tokens, + "sample": f"Sample request {req.request_id[:8]}...", + } + ) + + successful_count = first_benchmark.metrics.request_totals.successful or 0 + successful_tokens = first_benchmark.metrics.input_tokens_count.successful or 0 + return { + "prompts": { + "samples": input_texts, + "token_statistics": { + "mean": ( + successful_tokens / successful_count + if successful_count > 0 + else 0 + ), + }, + }, + "quality_validation": self._build_quality_section(first_benchmark) + if first_benchmark.metrics.quality + else None, + } + + def _build_quality_section( + self, benchmark: EmbeddingsBenchmark + ) -> dict[str, Any] | None: + """ + Build quality validation section. + + :param benchmark: Benchmark with quality metrics + :return: Quality section dictionary or None + """ + if not benchmark.metrics.quality: + return None + + quality = benchmark.metrics.quality + section: dict[str, Any] = {} + + # Cosine similarity distribution + if ( + quality.baseline_cosine_similarity + and quality.baseline_cosine_similarity.successful + ): + section["cosine_similarity"] = { + "mean": ( + quality.baseline_cosine_similarity.successful.mean + ), + "median": ( + quality.baseline_cosine_similarity.successful.median + ), + "std_dev": ( + quality.baseline_cosine_similarity.successful.std_dev + ), + "p95": ( + quality.baseline_cosine_similarity.successful + .percentiles.p95 + ), + } + + # MTEB scores + if quality.mteb_main_score is not None: + section["mteb"] = { + "main_score": quality.mteb_main_score, + "task_scores": quality.mteb_task_scores or {}, + } + + return section if section else None + + def _build_benchmarks_data( + self, benchmarks: list[EmbeddingsBenchmark] + ) -> list[dict[str, Any]]: + """ + Build benchmarks data for visualization. + + :param benchmarks: List of completed benchmarks + :return: List of benchmark data dictionaries + """ + results = [] + + for benchmark in benchmarks: + metrics = benchmark.metrics + + benchmark_data = { + "strategy": benchmark.config.strategy.type_, + "rate": getattr(benchmark.config.strategy, "rate", None), + "duration": benchmark.duration, + "warmup_duration": benchmark.warmup_duration, + "cooldown_duration": benchmark.cooldown_duration, + # Request counts + "request_counts": { + "successful": metrics.request_totals.successful, + "incomplete": metrics.request_totals.incomplete, + "errored": metrics.request_totals.errored, + "total": metrics.request_totals.total, + }, + # Request metrics + "request_latency": self._distribution_to_dict( + metrics.request_latency.successful + ), + "request_concurrency": self._distribution_to_dict( + metrics.request_concurrency.successful + ), + "requests_per_second": self._distribution_to_dict( + metrics.requests_per_second.successful + ), + # Token metrics (input only) + "input_tokens": { + "total": metrics.input_tokens_count.successful, + "per_second": self._distribution_to_dict( + metrics.input_tokens_per_second.successful + ), + }, + # Quality metrics (if available) + "quality": ( + self._build_quality_data(benchmark) + if metrics.quality + else None + ), + } + + results.append(benchmark_data) + + return results + + def _build_quality_data( + self, benchmark: EmbeddingsBenchmark + ) -> dict[str, Any] | None: + """ + Build quality metrics data. + + :param benchmark: Benchmark with quality metrics + :return: Quality data dictionary or None + """ + if not benchmark.metrics.quality: + return None + + quality = benchmark.metrics.quality + data: dict[str, Any] = {} + + if ( + quality.baseline_cosine_similarity + and quality.baseline_cosine_similarity.successful + ): + data["cosine_similarity"] = self._distribution_to_dict( + quality.baseline_cosine_similarity.successful + ) + + if quality.self_consistency_score and quality.self_consistency_score.successful: + data["self_consistency"] = self._distribution_to_dict( + quality.self_consistency_score.successful + ) + + if quality.mteb_main_score is not None: + data["mteb_main_score"] = quality.mteb_main_score + + if quality.mteb_task_scores: + data["mteb_task_scores"] = quality.mteb_task_scores + + return data if data else None + + def _distribution_to_dict( + self, dist: Any + ) -> dict[str, float | None]: + """ + Convert distribution summary to dictionary. + + :param dist: Distribution summary object + :return: Dictionary with mean, median, std_dev, and + percentiles + """ + if dist is None: + return { + "mean": None, + "median": None, + "std_dev": None, + "p50": None, + "p95": None, + "p99": None, + } + + return { + "mean": dist.mean, + "median": dist.median, + "std_dev": dist.std_dev, + "p50": ( + dist.percentiles.p50 + if hasattr(dist, "percentiles") + else dist.median + ), + "p95": ( + dist.percentiles.p95 if hasattr(dist, "percentiles") else None + ), + "p99": ( + dist.percentiles.p99 if hasattr(dist, "percentiles") else None + ), + } diff --git a/src/guidellm/benchmark/outputs/embeddings_serialized.py b/src/guidellm/benchmark/outputs/embeddings_serialized.py new file mode 100644 index 000000000..6378f0fd4 --- /dev/null +++ b/src/guidellm/benchmark/outputs/embeddings_serialized.py @@ -0,0 +1,70 @@ +""" +Serialized output handler for embeddings benchmark reports. + +Provides a serialized output implementation that saves embeddings benchmark reports +to JSON or YAML file formats. Extends the base EmbeddingsBenchmarkerOutput to handle +file-based persistence of benchmark results. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from pydantic import Field + +from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput +from guidellm.benchmark.schemas.embeddings import EmbeddingsBenchmarksReport + +__all__ = ["EmbeddingsBenchmarkerSerialized"] + + +@EmbeddingsBenchmarkerOutput.register(["json", "yaml"]) +class EmbeddingsBenchmarkerSerialized(EmbeddingsBenchmarkerOutput): + """ + Serialized output handler for embeddings benchmark reports in JSON or YAML formats. + + Persists embeddings benchmark reports to the file system in either JSON or YAML + format. Supports flexible path specification, allowing users to provide either + a directory (where a default filename will be generated) or an explicit file path. + + Example: + :: + output = EmbeddingsBenchmarkerSerialized( + output_path="/path/to/embeddings_output.json" + ) + result_path = await output.finalize(report) + """ + + output_path: Path = Field( + default_factory=lambda: Path.cwd(), + description="Directory or file path for saving the serialized report", + ) + + @classmethod + def validated_kwargs( + cls, output_path: str | Path | None, **_kwargs + ) -> dict[str, Any]: + """ + Validate and normalize output path keyword arguments. + + :param output_path: Directory or file path for serialization output + :param _kwargs: Additional keyword arguments (ignored) + :return: Dictionary of validated keyword arguments for class initialization + """ + validated: dict[str, Any] = {} + if output_path is not None: + validated["output_path"] = ( + Path(output_path) if not isinstance(output_path, Path) else output_path + ) + return validated + + async def finalize(self, report: EmbeddingsBenchmarksReport) -> Path: + """ + Serialize and save the embeddings benchmark report to the configured + output path. + + :param report: The embeddings benchmarks report to serialize + :return: Path to the saved report file + """ + return report.save_file(self.output_path) diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py index 318d9d4de..084cad611 100644 --- a/src/guidellm/benchmark/outputs/html.py +++ b/src/guidellm/benchmark/outputs/html.py @@ -357,7 +357,12 @@ def _build_workload_details( """ target = args.target rate_type = benchmarks[0].config.strategy.type_ - successful_requests = [req for bm in benchmarks for req in bm.requests.successful] + successful_requests = [ + req + for bm in benchmarks + if bm.requests.successful is not None + for req in bm.requests.successful + ] sample_indices = random.sample( range(len(successful_requests)), min(5, len(successful_requests)) @@ -378,11 +383,13 @@ def _build_workload_details( prompt_tokens = [ float(req.prompt_tokens) if req.prompt_tokens is not None else -1 for bm in benchmarks + if bm.requests.successful is not None for req in bm.requests.successful ] output_tokens = [ float(req.output_tokens) if req.output_tokens is not None else -1 for bm in benchmarks + if bm.requests.successful is not None for req in bm.requests.successful ] @@ -396,6 +403,7 @@ def _build_workload_details( all_req_times = [ req.info.timings.request_start - min_start_time for bm in benchmarks + if bm.requests.successful is not None for req in bm.requests.successful if req.info.timings.request_start is not None ] @@ -451,22 +459,30 @@ def _build_benchmarks(benchmarks: list[GenerativeBenchmark]) -> list[dict[str, A """ result = [] for bm in benchmarks: + # Helper to safely get distribution summary or None + def get_dist_summary(dist: DistributionSummary | None) -> dict | None: + if dist is not None: + return _TabularDistributionSummary.from_distribution_summary( + dist + ).model_dump() + return None + result.append( { - "requests_per_second": bm.metrics.requests_per_second.successful.mean, - "itl": _TabularDistributionSummary.from_distribution_summary( - bm.metrics.inter_token_latency_ms.successful - ).model_dump(), - "ttft": _TabularDistributionSummary.from_distribution_summary( + "requests_per_second": ( + bm.metrics.requests_per_second.successful.mean + if bm.metrics.requests_per_second.successful is not None + else 0.0 + ), + "itl": get_dist_summary(bm.metrics.inter_token_latency_ms.successful), + "ttft": get_dist_summary( bm.metrics.time_to_first_token_ms.successful - ).model_dump(), - "throughput": _TabularDistributionSummary.from_distribution_summary( + ), + "throughput": get_dist_summary( bm.metrics.output_tokens_per_second.successful - ).model_dump(), - "time_per_request": ( - _TabularDistributionSummary.from_distribution_summary( - bm.metrics.request_latency.successful - ).model_dump() + ), + "time_per_request": get_dist_summary( + bm.metrics.request_latency.successful ), } ) diff --git a/src/guidellm/benchmark/outputs/html_outputs/__init__.py b/src/guidellm/benchmark/outputs/html_outputs/__init__.py new file mode 100644 index 000000000..4a5840cdc --- /dev/null +++ b/src/guidellm/benchmark/outputs/html_outputs/__init__.py @@ -0,0 +1 @@ +"""HTML template resources for benchmark outputs.""" diff --git a/src/guidellm/benchmark/outputs/html_outputs/embeddings_template.html b/src/guidellm/benchmark/outputs/html_outputs/embeddings_template.html new file mode 100644 index 000000000..5f3012364 --- /dev/null +++ b/src/guidellm/benchmark/outputs/html_outputs/embeddings_template.html @@ -0,0 +1,156 @@ + + + + + + GuideLLM Embeddings Benchmark Report + + + +
+

GuideLLM Embeddings Benchmark Report

+ +

Summary

+
+ +

Metrics

+
+ +

Details

+
+
+ + + + diff --git a/src/guidellm/benchmark/outputs/output.py b/src/guidellm/benchmark/outputs/output.py index f6ec6e708..dba4f17f0 100644 --- a/src/guidellm/benchmark/outputs/output.py +++ b/src/guidellm/benchmark/outputs/output.py @@ -18,9 +18,10 @@ from pydantic import BaseModel, ConfigDict from guidellm.benchmark.schemas import GenerativeBenchmarksReport +from guidellm.benchmark.schemas.embeddings import EmbeddingsBenchmarksReport from guidellm.utils import RegistryMixin -__all__ = ["GenerativeBenchmarkerOutput"] +__all__ = ["EmbeddingsBenchmarkerOutput", "GenerativeBenchmarkerOutput"] class GenerativeBenchmarkerOutput( @@ -167,3 +168,131 @@ async def finalize(self, report: GenerativeBenchmarksReport) -> Any: :raises NotImplementedError: Must be implemented by subclasses """ ... + + +class EmbeddingsBenchmarkerOutput( + BaseModel, RegistryMixin[type["EmbeddingsBenchmarkerOutput"]], ABC +): + """ + Abstract base for embeddings benchmark output formatters with registry support. + + Defines the interface for transforming embeddings benchmark reports into various + output formats. Similar to GenerativeBenchmarkerOutput but adapted for embeddings + which lack output tokens, streaming metrics, and multi-modality support. + + Example: + :: + # Register and resolve output formats + outputs = EmbeddingsBenchmarkerOutput.resolve( + output_formats=["json", "csv"], + output_path="./results" + ) + + # Finalize outputs with benchmark report + for output in outputs.values(): + await output.finalize(report) + """ + + model_config = ConfigDict( + extra="ignore", + arbitrary_types_allowed=True, + validate_assignment=True, + from_attributes=True, + use_enum_values=True, + ) + + @classmethod + @abstractmethod + def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]: + """ + Validate and normalize initialization arguments for output formatter. + + :param args: Positional arguments for formatter configuration + :param kwargs: Keyword arguments for formatter configuration + :return: Validated dictionary of parameters for formatter creation + :raises NotImplementedError: Must be implemented by subclasses + """ + ... + + @classmethod + def resolve( + cls, + outputs: ( + Sequence[str | EmbeddingsBenchmarkerOutput] + | Mapping[str, str | dict[str, Any] | EmbeddingsBenchmarkerOutput] + | None + ), + output_dir: str | Path | None, + ) -> dict[str, EmbeddingsBenchmarkerOutput]: + """ + Resolve output format specifications into formatter instances. + + :param outputs: Format specifications + :param output_dir: Default output directory path + :return: Dictionary mapping format keys to instantiated formatter instances + :raises TypeError: If format specification type is invalid + :raises ValueError: If format resolution or validation fails + """ + if not outputs: + return {} + + keys: Sequence[str] + values: Sequence[dict[str, Any] | EmbeddingsBenchmarkerOutput] + if isinstance(outputs, Mapping): + keys = list(outputs.keys()) + values = list(outputs.values()) # type: ignore[arg-type] + else: + keys = [] + values = [] + + for out in outputs: + if isinstance(out, str) and "." in out: + ext = Path(out).suffix[1:].lower() + keys.append(ext) + values.append({"output_path": Path(output_dir or Path.cwd()) / out}) + elif isinstance(out, str): + keys.append(out) + values.append({}) + elif isinstance(out, EmbeddingsBenchmarkerOutput): + keys.append(out.__class__.__name__) + values.append(out) + else: + raise TypeError( + "output_formats must be a sequence of strings or " + "EmbeddingsBenchmarkerOutput instances, or a mapping." + ) + + resolved: dict[str, EmbeddingsBenchmarkerOutput] = {} + for key, val in zip(keys, values, strict=True): + if isinstance(val, EmbeddingsBenchmarkerOutput): + resolved[key] = val + else: + output_class = cls.get_registered_object(key) + if output_class is None: + available_formats = ( + list(cls.registry.keys()) if cls.registry else [] + ) + raise ValueError( + f"Output format '{key}' is not registered. " + f"Available formats: {available_formats}" + ) + kwargs = output_class.validated_kwargs( + **{"output_path": output_dir, **val} # type: ignore[dict-item] + ) + resolved[key] = output_class(**kwargs) + + return resolved + + @abstractmethod + async def finalize(self, report: EmbeddingsBenchmarksReport) -> Any: + """ + Process and persist embeddings benchmark report in the formatter's + output format. + + :param report: Embeddings benchmark report containing results to + format + :return: Format-specific output result (file path, response object, + etc.) + :raises NotImplementedError: Must be implemented by subclasses + """ + ... diff --git a/src/guidellm/benchmark/progress.py b/src/guidellm/benchmark/progress.py index 289e367c0..25eb41308 100644 --- a/src/guidellm/benchmark/progress.py +++ b/src/guidellm/benchmark/progress.py @@ -32,6 +32,8 @@ from guidellm.benchmark.schemas import ( BenchmarkAccumulatorT, BenchmarkT, + EmbeddingsBenchmark, + EmbeddingsBenchmarkAccumulator, GenerativeBenchmark, GenerativeBenchmarkAccumulator, ) @@ -181,7 +183,7 @@ async def on_benchmark_start(self, strategy: SchedulingStrategy): async def on_benchmark_update( self, - accumulator: GenerativeBenchmarkAccumulator, + accumulator: GenerativeBenchmarkAccumulator | EmbeddingsBenchmarkAccumulator, scheduler_state: SchedulerState, ): """ @@ -307,7 +309,7 @@ def start_benchmark(self, strategy: SchedulingStrategy): def update_benchmark( self, - accumulator: GenerativeBenchmarkAccumulator, + accumulator: GenerativeBenchmarkAccumulator | EmbeddingsBenchmarkAccumulator, scheduler_state: SchedulerState, ): current_state = self.benchmark_task_states[self.current_index] @@ -356,6 +358,7 @@ class _GenerativeProgressTaskState: queued_time: float = 0.0 request_targeted_start_delay: float = 0.0 scheduler_overheads_time: float = 0.0 + is_embeddings: bool = False # Track if this is an embeddings benchmark @property def current(self) -> dict[str, Any]: @@ -473,6 +476,28 @@ def formatted_tokens_summary(self) -> str: if self.benchmark_status == "pending": return " " + # Show simplified metrics for embeddings (no output tokens, TTFT, ITL) + if self.is_embeddings: + return ( + f"[{Colors.info}]Tok:[/{Colors.info}] " + + format_value_display( + value=self.total_tokens_rate, + label="inp/s", + total_characters=12, + digits_places=4, + decimal_places=1, + ) + + ", " + + format_value_display( + value=self.prompt_tokens, + label="Input", + total_characters=12, + digits_places=4, + decimal_places=0, + ) + ) + + # Full metrics for generative models return ( f"[{Colors.info}]Tok:[/{Colors.info}] " + format_value_display( @@ -566,7 +591,7 @@ def start(self, strategy: SchedulingStrategy): def update( self, - accumulator: GenerativeBenchmarkAccumulator, + accumulator: GenerativeBenchmarkAccumulator | EmbeddingsBenchmarkAccumulator, scheduler_state: SchedulerState, ): self.progress = ( @@ -586,15 +611,33 @@ def update( requests_per_second=accumulator.completed_metrics.requests.rate_per_second, request_latency=accumulator.completed_metrics.request_latency.mean, ) - self._update_token_stats( - output_tokens=accumulator.completed_metrics.total_tokens.mean, - output_tokens_rate=accumulator.completed_metrics.output_tokens.rate_per_second, - prompt_tokens=accumulator.completed_metrics.prompt_tokens.mean, - total_tokens_rate=accumulator.completed_metrics.total_tokens.rate_per_second, - time_to_first_token=accumulator.completed_metrics.time_to_first_token_ms.mean, - inter_token_latency=accumulator.completed_metrics.inter_token_latency_ms.mean, - converted=True, - ) + + # Handle token stats differently for embeddings vs generative + if isinstance(accumulator, EmbeddingsBenchmarkAccumulator): + # Mark as embeddings benchmark + self.is_embeddings = True + # For embeddings: no output tokens, TTFT, or ITL + self._update_token_stats( + output_tokens=0.0, + output_tokens_rate=0.0, + prompt_tokens=accumulator.completed_metrics.prompt_tokens.mean, + total_tokens_rate=accumulator.completed_metrics.prompt_tokens.rate_per_second, + time_to_first_token=0.0, + inter_token_latency=0.0, + converted=True, + ) + else: + # For generative: full token stats + self._update_token_stats( + output_tokens=accumulator.completed_metrics.total_tokens.mean, + output_tokens_rate=accumulator.completed_metrics.output_tokens.rate_per_second, + prompt_tokens=accumulator.completed_metrics.prompt_tokens.mean, + total_tokens_rate=accumulator.completed_metrics.total_tokens.rate_per_second, + time_to_first_token=accumulator.completed_metrics.time_to_first_token_ms.mean, + inter_token_latency=accumulator.completed_metrics.inter_token_latency_ms.mean, + converted=True, + ) + self._update_system_stats( request_targeted_start_delay=accumulator.scheduler_metrics.request_targeted_start_delay.mean, queued_time=accumulator.scheduler_metrics.queued_time.mean, @@ -602,7 +645,7 @@ def update( converted=False, ) - def complete(self, benchmark: GenerativeBenchmark): + def complete(self, benchmark: GenerativeBenchmark | EmbeddingsBenchmark): self._update_processing_states( benchmark_status="completed", start_time=benchmark.start_time, @@ -611,24 +654,89 @@ def complete(self, benchmark: GenerativeBenchmark): errored_requests=benchmark.metrics.request_totals.errored, ) self._update_request_stats( - request_concurrency=benchmark.metrics.request_concurrency.successful.mean, - requests_per_second=benchmark.metrics.requests_per_second.successful.mean, - request_latency=benchmark.metrics.request_latency.successful.mean, - ) - self._update_token_stats( - output_tokens=benchmark.metrics.output_token_count.successful.mean, - output_tokens_rate=benchmark.metrics.output_tokens_per_second.successful.mean, - prompt_tokens=benchmark.metrics.prompt_token_count.successful.mean, - total_tokens_rate=benchmark.metrics.tokens_per_second.successful.mean, - time_to_first_token=( - benchmark.metrics.time_to_first_token_ms.successful.mean + request_concurrency=( + benchmark.metrics.request_concurrency.successful.mean + if benchmark.metrics.request_concurrency.successful is not None + else 0.0 ), - inter_token_latency=( - benchmark.metrics.inter_token_latency_ms.successful.mean + requests_per_second=( + benchmark.metrics.requests_per_second.successful.mean + if benchmark.metrics.requests_per_second.successful is not None + else 0.0 + ), + request_latency=( + benchmark.metrics.request_latency.successful.mean + if benchmark.metrics.request_latency.successful is not None + else 0.0 ), - converted=True, ) + # Handle token stats differently for embeddings vs generative benchmarks + if isinstance(benchmark, EmbeddingsBenchmark): + # Mark as embeddings benchmark + self.is_embeddings = True + # For embeddings: output_token_count is StatusBreakdown[int] not stats + # Get successful token count + prompt_tokens: int + if hasattr(benchmark.metrics, "input_tokens_count"): + prompt_tokens = benchmark.metrics.input_tokens_count.successful or 0 + else: + prompt_tokens = ( + benchmark.metrics.prompt_token_count.successful + if benchmark.metrics.prompt_token_count is not None + and benchmark.metrics.prompt_token_count.successful is not None + else 0 + ) + + self._update_token_stats( + output_tokens=0.0, # Embeddings have no output tokens + output_tokens_rate=0.0, + prompt_tokens=prompt_tokens, + total_tokens_rate=( + benchmark.metrics.input_tokens_per_second.successful.mean + if benchmark.metrics.input_tokens_per_second.successful is not None + else 0.0 + ), + time_to_first_token=0.0, # No TTFT for embeddings + inter_token_latency=0.0, # No ITL for embeddings + converted=True, + ) + else: + # For generative: output_token_count is StatusDistributionSummary + self._update_token_stats( + output_tokens=( + benchmark.metrics.output_token_count.successful.mean + if benchmark.metrics.output_token_count.successful is not None + else 0.0 + ), + output_tokens_rate=( + benchmark.metrics.output_tokens_per_second.successful.mean + if benchmark.metrics.output_tokens_per_second.successful is not None + else 0.0 + ), + prompt_tokens=( + benchmark.metrics.prompt_token_count.successful.mean + if benchmark.metrics.prompt_token_count.successful is not None + else 0.0 + ), + total_tokens_rate=( + benchmark.metrics.tokens_per_second.successful.mean + if benchmark.metrics.tokens_per_second.successful is not None + else 0.0 + ), + time_to_first_token=( + benchmark.metrics.time_to_first_token_ms.successful.mean + if benchmark.metrics.time_to_first_token_ms.successful is not None + else 0.0 + ), + inter_token_latency=( + benchmark.metrics.inter_token_latency_ms.successful.mean + if benchmark.metrics.inter_token_latency_ms.successful is not None + else 0.0 + ), + converted=True, + ) + @staticmethod def _map_status( status: Literal["pending", "warmup", "active", "cooldown", "completed"], diff --git a/src/guidellm/benchmark/quality/__init__.py b/src/guidellm/benchmark/quality/__init__.py new file mode 100644 index 000000000..e4d22e08c --- /dev/null +++ b/src/guidellm/benchmark/quality/__init__.py @@ -0,0 +1,19 @@ +""" +Quality validation and benchmarking tools for embeddings. + +This module provides comprehensive quality validation capabilities for embeddings +including cosine similarity validation against baseline models and MTEB (Massive +Text Embedding Benchmark) integration for standardized quality evaluation. +""" + +from __future__ import annotations + +from .mteb_integration import DEFAULT_MTEB_TASKS, MTEBValidator +from .validators import EmbeddingsQualityValidator, compute_cosine_similarity + +__all__ = [ + "DEFAULT_MTEB_TASKS", + "EmbeddingsQualityValidator", + "MTEBValidator", + "compute_cosine_similarity", +] diff --git a/src/guidellm/benchmark/quality/mteb_integration.py b/src/guidellm/benchmark/quality/mteb_integration.py new file mode 100644 index 000000000..b328dce09 --- /dev/null +++ b/src/guidellm/benchmark/quality/mteb_integration.py @@ -0,0 +1,274 @@ +""" +MTEB (Massive Text Embedding Benchmark) integration for embeddings quality evaluation. + +Provides standardized benchmark evaluation using MTEB tasks like STS (Semantic Textual +Similarity) to measure embedding quality across multiple standardized datasets. Follows +vLLM patterns for MTEB evaluation with configurable task selection and lightweight +defaults suitable for CI/CD environments. +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np + +__all__ = [ + "DEFAULT_MTEB_TASKS", + "MTEBValidator", +] + +DEFAULT_MTEB_TASKS = ["STS12", "STS13", "STSBenchmark"] +"""Default MTEB tasks for lightweight evaluation (Semantic Textual Similarity).""" + + +class MTEBValidator: + """ + MTEB benchmark integration for standardized quality evaluation. + + Runs MTEB evaluation tasks on embedding models to produce standardized quality + scores. Supports configurable task selection with defaults focused on lightweight + STS (Semantic Textual Similarity) tasks suitable for regular benchmarking. + + Example: + :: + validator = MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2", + task_names=["STS12", "STS13"] + ) + + results = validator.run_evaluation() + print(f"MTEB Main Score: {results['mteb_main_score']:.4f}") + for task, score in results['mteb_task_scores'].items(): + print(f"{task}: {score:.4f}") + """ + + def __init__( + self, + model_name: str, + task_names: list[str] | None = None, + device: str | None = None, + batch_size: int = 32, + ): + """ + Initialize MTEB validator with model and task configuration. + + :param model_name: HuggingFace model name or path for evaluation + :param task_names: List of MTEB tasks to evaluate (uses + DEFAULT_MTEB_TASKS if None) + :param device: Device for model inference ("cpu", "cuda", "mps", or + None for auto) + :param batch_size: Batch size for encoding during evaluation + :raises ImportError: If mteb or sentence-transformers is not + installed + """ + try: + from sentence_transformers import SentenceTransformer + except ImportError as e: + raise ImportError( + "sentence-transformers is required for MTEB evaluation. " + "Install with: pip install sentence-transformers" + ) from e + + try: + import mteb + except ImportError as e: + raise ImportError( + "mteb is required for MTEB evaluation. " + "Install with: pip install mteb" + ) from e + + self.model_name = model_name + self.task_names = task_names if task_names is not None else DEFAULT_MTEB_TASKS + self.device = device + self.batch_size = batch_size + + # Load model + self.model = SentenceTransformer(model_name, device=device) + + # Store mteb module reference + self.mteb = mteb + + def run_evaluation( # noqa: C901 + self, + output_folder: str | None = None, + verbosity: int = 1, + ) -> dict[str, Any]: + """ + Run MTEB evaluation on configured tasks. + + Executes MTEB benchmark tasks and computes standardized quality scores. + Returns both individual task scores and an aggregated main score. + + :param output_folder: Optional folder to save detailed results + :param verbosity: Verbosity level (0=silent, 1=progress, 2=detailed) + :return: Dictionary with 'mteb_main_score' and 'mteb_task_scores' + + Example: + :: + results = validator.run_evaluation() + + # Access main score (average across tasks) + main_score = results['mteb_main_score'] + + # Access individual task scores + for task, score in results['mteb_task_scores'].items(): + print(f"{task}: {score:.4f}") + """ + # Get MTEB task objects + tasks = self.mteb.get_tasks(tasks=self.task_names) + + # Create MTEB evaluation object + evaluation = self.mteb.MTEB(tasks=tasks) + + # Run evaluation + results = evaluation.run( + self.model, + output_folder=output_folder, + verbosity=verbosity, + encode_kwargs={"batch_size": self.batch_size}, + ) + + # Extract scores from results + task_scores = {} + for task_name in self.task_names: + if task_name in results: + # MTEB results structure varies by task type + # Try to extract main_score or test score + task_result = results[task_name] + + if isinstance(task_result, dict): + # Look for main_score in various possible locations + if "main_score" in task_result: + task_scores[task_name] = float( + task_result["main_score"] + ) + elif "test" in task_result and isinstance( + task_result["test"], dict + ): + # Some tasks have test split with scores + test_result = task_result["test"] + if "main_score" in test_result: + task_scores[task_name] = float( + test_result["main_score"] + ) + elif "cosine_spearman" in test_result: + # STS tasks use cosine_spearman as primary + task_scores[task_name] = float( + test_result["cosine_spearman"] + ) + elif "scores" in task_result: + # Fallback to scores field + scores = task_result["scores"] + if isinstance(scores, list) and scores: + task_scores[task_name] = float(np.mean(scores)) + elif isinstance(scores, int | float): + task_scores[task_name] = float(scores) + + # Compute main score as average across tasks + main_score = ( + float(np.mean(list(task_scores.values()))) + if task_scores + else 0.0 + ) + + return { + "mteb_main_score": main_score, + "mteb_task_scores": task_scores, + } + + def get_available_tasks(self) -> list[str]: + """ + Get list of all available MTEB tasks. + + :return: List of available task names + + Example: + :: + validator = MTEBValidator(model_name="...") + tasks = validator.get_available_tasks() + print(f"Available tasks: {tasks}") + """ + all_tasks = self.mteb.get_tasks() + return [task.metadata.name for task in all_tasks] + + def get_task_info(self, task_name: str) -> dict[str, Any]: + """ + Get metadata information about a specific MTEB task. + + :param task_name: Name of the MTEB task + :return: Dictionary with task metadata + :raises ValueError: If task is not found + + Example: + :: + info = validator.get_task_info("STS12") + print(f"Task: {info['name']}") + print(f"Description: {info['description']}") + """ + tasks = self.mteb.get_tasks(tasks=[task_name]) + + if not tasks: + raise ValueError(f"MTEB task '{task_name}' not found") + + task = tasks[0] + metadata = task.metadata + + return { + "name": metadata.name, + "description": getattr(metadata, "description", ""), + "type": getattr(metadata, "type", ""), + "category": getattr(metadata, "category", ""), + "eval_splits": getattr(metadata, "eval_splits", []), + "main_score": getattr(metadata, "main_score", ""), + } + + @staticmethod + def get_recommended_tasks(category: str = "sts") -> list[str]: + """ + Get recommended MTEB tasks for specific evaluation categories. + + :param category: Evaluation category ("sts", "classification", + "retrieval", etc.) + :return: List of recommended task names + + Example: + :: + sts_tasks = MTEBValidator.get_recommended_tasks("sts") + # Returns: ["STS12", "STS13", "STS14", "STS15", "STS16", "STSBenchmark"] + """ + recommendations = { + "sts": [ + "STS12", + "STS13", + "STS14", + "STS15", + "STS16", + "STSBenchmark", + "SICKRelatedness", + ], + "classification": [ + "AmazonCounterfactualClassification", + "AmazonPolarityClassification", + "AmazonReviewsClassification", + "Banking77Classification", + "EmotionClassification", + ], + "clustering": [ + "ArxivClusteringP2P", + "ArxivClusteringS2S", + "BiorxivClusteringP2P", + "BiorxivClusteringS2S", + "MedrxivClusteringP2P", + ], + "retrieval": [ + "ArguAna", + "ClimateFEVER", + "CQADupstackRetrieval", + "DBPedia", + "FEVER", + ], + "lightweight": DEFAULT_MTEB_TASKS, # Fastest tasks for CI/CD + } + + return recommendations.get(category.lower(), DEFAULT_MTEB_TASKS) diff --git a/src/guidellm/benchmark/quality/validators.py b/src/guidellm/benchmark/quality/validators.py new file mode 100644 index 000000000..508951e91 --- /dev/null +++ b/src/guidellm/benchmark/quality/validators.py @@ -0,0 +1,329 @@ +""" +Quality validation for embeddings benchmarks. + +Provides tools for validating embedding quality through cosine similarity +comparison against baseline models. Supports HuggingFace SentenceTransformers +models as baselines and implements tolerance-based validation following vLLM +patterns (1e-2 standard, 5e-4 MTEB). +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from numpy.typing import NDArray + +__all__ = [ + "EmbeddingsQualityValidator", + "compute_cosine_similarity", +] + + +def compute_cosine_similarity( + emb1: NDArray[np.float32] | list[float], + emb2: NDArray[np.float32] | list[float], +) -> float: + """ + Compute cosine similarity between two embedding vectors. + + Cosine similarity measures the cosine of the angle between two vectors, + ranging from -1 (opposite) to 1 (identical direction). For normalized + embeddings, this is equivalent to the dot product. + + Formula: cos_sim = dot(emb1, emb2) / (||emb1|| * ||emb2||) + + :param emb1: First embedding vector (numpy array or list) + :param emb2: Second embedding vector (numpy array or list) + :return: Cosine similarity score between -1.0 and 1.0 + :raises ValueError: If embeddings have different dimensions or are empty + + Example: + :: + emb1 = np.array([1.0, 0.0, 0.0]) + emb2 = np.array([1.0, 0.0, 0.0]) + similarity = compute_cosine_similarity(emb1, emb2) # Returns 1.0 + + emb3 = np.array([0.0, 1.0, 0.0]) + similarity = compute_cosine_similarity(emb1, emb3) # Returns 0.0 + """ + # Convert to numpy arrays if needed + vec1 = np.array(emb1, dtype=np.float32) + vec2 = np.array(emb2, dtype=np.float32) + + # Validate dimensions + if vec1.shape != vec2.shape: + raise ValueError( + f"Embedding dimensions must match: {vec1.shape} vs {vec2.shape}" + ) + + if vec1.size == 0: + raise ValueError("Embeddings cannot be empty") + + # Compute norms + norm1 = np.linalg.norm(vec1) + norm2 = np.linalg.norm(vec2) + + # Handle zero vectors + if norm1 == 0.0 or norm2 == 0.0: + return 0.0 + + # Compute cosine similarity + dot_product = np.dot(vec1, vec2) + cosine_sim = dot_product / (norm1 * norm2) + + return float(cosine_sim) + + +class EmbeddingsQualityValidator: + """ + Validates embedding quality against baseline models. + + Loads a HuggingFace SentenceTransformers model as a baseline and compares + target embeddings against baseline outputs using cosine similarity. Supports + configurable tolerance thresholds following vLLM patterns. + + Example: + :: + validator = EmbeddingsQualityValidator( + baseline_model="sentence-transformers/all-MiniLM-L6-v2", + tolerance=1e-2 + ) + + text = "This is a test sentence" + target_embedding = [0.1, 0.2, 0.3, ...] # From target model + + similarity = validator.validate_against_baseline(text, target_embedding) + is_valid = validator.check_tolerance(similarity) + """ + + def __init__( + self, + baseline_model: str, + tolerance: float = 1e-2, + device: str | None = None, + ): + """ + Initialize quality validator with baseline model. + + :param baseline_model: HuggingFace model name or path + (e.g., "sentence-transformers/all-MiniLM-L6-v2") + :param tolerance: Cosine similarity tolerance threshold + (1e-2 for standard, 5e-4 for MTEB-level validation) + :param device: Device for model inference ("cpu", "cuda", "mps", or + None for auto) + :raises ImportError: If sentence-transformers is not installed + """ + try: + from sentence_transformers import SentenceTransformer + except ImportError as e: + raise ImportError( + "sentence-transformers is required for quality validation. " + "Install with: pip install sentence-transformers" + ) from e + + self.baseline_model_name = baseline_model + self.tolerance = tolerance + self.device = device + + # Load baseline model + self.baseline_model = SentenceTransformer(baseline_model, device=device) + + def encode_baseline( + self, + texts: str | list[str], + normalize: bool = True, + batch_size: int = 32, + ) -> NDArray[np.float32]: + """ + Generate embeddings using the baseline model. + + :param texts: Single text or list of texts to encode + :param normalize: Whether to normalize embeddings to unit length + :param batch_size: Batch size for encoding + :return: Embeddings as numpy array (shape: [n_texts, embedding_dim]) + """ + embeddings = self.baseline_model.encode( + texts, + normalize_embeddings=normalize, + batch_size=batch_size, + show_progress_bar=False, + ) + + # Ensure return type is correct + if isinstance(texts, str): + return np.array(embeddings, dtype=np.float32) + return np.array(embeddings, dtype=np.float32) + + def validate_against_baseline( + self, + text: str, + target_embedding: NDArray[np.float32] | list[float], + normalize: bool = True, + ) -> float: + """ + Compare target embedding against baseline model output. + + :param text: Input text that was embedded + :param target_embedding: Embedding from target model to validate + :param normalize: Whether to normalize embeddings before comparison + :return: Cosine similarity score (0.0 to 1.0) + + Example: + :: + text = "Example sentence" + target_emb = model.encode(text) # From target model + similarity = validator.validate_against_baseline(text, target_emb) + # High similarity (>0.95) indicates good quality + """ + # Generate baseline embedding + baseline_embedding = self.encode_baseline(text, normalize=normalize) + + # Convert target to numpy if needed + target_array = np.array(target_embedding, dtype=np.float32) + + # Normalize target if requested + if normalize: + norm = np.linalg.norm(target_array) + if norm > 0: + target_array = target_array / norm + + # Compute similarity + return compute_cosine_similarity(baseline_embedding, target_array) + + def validate_batch( + self, + texts: list[str], + target_embeddings: NDArray[np.float32] | list[list[float]], + normalize: bool = True, + ) -> list[float]: + """ + Validate multiple embeddings against baseline model. + + :param texts: List of input texts + :param target_embeddings: Embeddings from target model (shape: [n, dim]) + :param normalize: Whether to normalize embeddings before comparison + :return: List of cosine similarity scores + + Example: + :: + texts = ["Text 1", "Text 2", "Text 3"] + target_embs = model.encode(texts) + similarities = validator.validate_batch(texts, target_embs) + mean_similarity = np.mean(similarities) + """ + # Generate baseline embeddings for all texts + baseline_embeddings = self.encode_baseline(texts, normalize=normalize) + + # Convert target to numpy if needed + target_array = np.array(target_embeddings, dtype=np.float32) + + # Normalize targets if requested + if normalize: + norms = np.linalg.norm(target_array, axis=1, keepdims=True) + target_array = np.where(norms > 0, target_array / norms, target_array) + + # Compute similarities + similarities = [] + for baseline_emb, target_emb in zip( + baseline_embeddings, target_array, strict=False + ): + sim = compute_cosine_similarity(baseline_emb, target_emb) + similarities.append(sim) + + return similarities + + def check_tolerance(self, similarity: float) -> bool: + """ + Check if similarity meets tolerance threshold. + + :param similarity: Cosine similarity score to validate + :return: True if similarity is within tolerance (similarity >= 1.0 - tolerance) + + Example: + :: + # With tolerance=1e-2 (0.01) + validator.check_tolerance(0.99) # True (within 1% of perfect) + validator.check_tolerance(0.985) # False (outside tolerance) + """ + return similarity >= (1.0 - self.tolerance) + + def check_self_consistency( + self, + _text: str, + embeddings: list[NDArray[np.float32] | list[float]], + tolerance: float | None = None, + ) -> tuple[float, bool]: + """ + Verify that same input produces consistent embeddings. + + Self-consistency check ensures the model produces identical (or nearly + identical) embeddings for the same input text across multiple inferences. + + :param text: Input text (same for all embeddings) + :param embeddings: List of embeddings from repeated encodings of the same text + :param tolerance: Optional tolerance override (uses instance tolerance if None) + :return: Tuple of (mean_similarity, is_consistent) + + Example: + :: + text = "Consistency test" + embeddings = [model.encode(text) for _ in range(5)] + mean_sim, is_consistent = validator.check_self_consistency(text, embeddings) + # Should be near 1.0 for deterministic models + """ + if len(embeddings) < 2: # noqa: PLR2004 + # Need at least 2 embeddings to compare + return 1.0, True + + tolerance_threshold = tolerance if tolerance is not None else self.tolerance + + # Compute pairwise similarities + similarities = [] + for i in range(len(embeddings)): + for j in range(i + 1, len(embeddings)): + sim = compute_cosine_similarity(embeddings[i], embeddings[j]) + similarities.append(sim) + + # Compute mean similarity + mean_similarity = float(np.mean(similarities)) + + # Check if all comparisons meet tolerance + is_consistent = mean_similarity >= (1.0 - tolerance_threshold) + + return mean_similarity, is_consistent + + def get_embedding_stats( + self, embeddings: NDArray[np.float32] | list[list[float]] + ) -> dict[str, float]: + """ + Compute statistical properties of embeddings. + + :param embeddings: Embeddings array (shape: [n, dim]) + :return: Dictionary with statistics (mean_norm, std_norm, mean_value, std_value) + + Example: + :: + embeddings = model.encode(texts) + stats = validator.get_embedding_stats(embeddings) + print(f"Mean norm: {stats['mean_norm']:.4f}") + """ + emb_array = np.array(embeddings, dtype=np.float32) + + # Compute norms + norms = np.linalg.norm(emb_array, axis=1) + + # Compute value statistics + mean_value = float(np.mean(emb_array)) + std_value = float(np.std(emb_array)) + + return { + "mean_norm": float(np.mean(norms)), + "std_norm": float(np.std(norms)), + "mean_value": mean_value, + "std_value": std_value, + "min_value": float(np.min(emb_array)), + "max_value": float(np.max(emb_array)), + } diff --git a/src/guidellm/benchmark/schemas/__init__.py b/src/guidellm/benchmark/schemas/__init__.py index 0b9fd0a9c..13cc4a0bc 100644 --- a/src/guidellm/benchmark/schemas/__init__.py +++ b/src/guidellm/benchmark/schemas/__init__.py @@ -20,6 +20,16 @@ BenchmarkConfig, BenchmarkT, ) +from .embeddings import ( + BenchmarkEmbeddingsArgs, + EmbeddingsBenchmark, + EmbeddingsBenchmarkAccumulator, + EmbeddingsBenchmarkMetadata, + EmbeddingsBenchmarksReport, + EmbeddingsBenchmarkTimings, + EmbeddingsMetrics, + EmbeddingsQualityMetrics, +) from .generative import ( BenchmarkGenerativeTextArgs, GenerativeAudioMetricsSummary, @@ -45,8 +55,16 @@ "BenchmarkAccumulator", "BenchmarkAccumulatorT", "BenchmarkConfig", + "BenchmarkEmbeddingsArgs", "BenchmarkGenerativeTextArgs", "BenchmarkT", + "EmbeddingsBenchmark", + "EmbeddingsBenchmarkAccumulator", + "EmbeddingsBenchmarkMetadata", + "EmbeddingsBenchmarkTimings", + "EmbeddingsBenchmarksReport", + "EmbeddingsMetrics", + "EmbeddingsQualityMetrics", "GenerativeAudioMetricsSummary", "GenerativeBenchmark", "GenerativeBenchmarkAccumulator", diff --git a/src/guidellm/benchmark/schemas/base.py b/src/guidellm/benchmark/schemas/base.py index 9a41171f0..9370c215b 100644 --- a/src/guidellm/benchmark/schemas/base.py +++ b/src/guidellm/benchmark/schemas/base.py @@ -273,7 +273,7 @@ class BenchmarkConfig(StandardBaseDict): description="Constraint definitions applied to scheduler strategy execution", ) sample_requests: int | None = Field( - default=20, + default=None, description="Request count for statistical sampling in final metrics", ) warmup: TransientPhaseConfig = Field( diff --git a/src/guidellm/benchmark/schemas/embeddings/__init__.py b/src/guidellm/benchmark/schemas/embeddings/__init__.py new file mode 100644 index 000000000..6f62128df --- /dev/null +++ b/src/guidellm/benchmark/schemas/embeddings/__init__.py @@ -0,0 +1,47 @@ +""" +Embeddings benchmark schemas for performance measurement and analysis. + +This module provides the complete schema ecosystem for executing, tracking, and +analyzing embeddings benchmarks. It encompasses configuration entrypoints for +benchmark setup, real-time metric accumulators for execution monitoring, +comprehensive result containers with statistical summaries, multi-benchmark +reporting capabilities, and optional quality validation metrics including cosine +similarity and MTEB benchmarks. +""" + +from __future__ import annotations + +from .accumulator import ( + EmbeddingsBenchmarkAccumulator, + EmbeddingsBenchmarkTimings, + EmbeddingsMetricsAccumulator, + EmbeddingsQualityMetricsAccumulator, + EmbeddingsRequestsAccumulator, + RunningMetricStats, + SchedulerMetricsAccumulator, +) +from .benchmark import EmbeddingsBenchmark +from .entrypoints import BenchmarkEmbeddingsArgs +from .metrics import ( + EmbeddingsMetrics, + EmbeddingsQualityMetrics, + SchedulerMetrics, +) +from .report import EmbeddingsBenchmarkMetadata, EmbeddingsBenchmarksReport + +__all__ = [ + "BenchmarkEmbeddingsArgs", + "EmbeddingsBenchmark", + "EmbeddingsBenchmarkAccumulator", + "EmbeddingsBenchmarkMetadata", + "EmbeddingsBenchmarkTimings", + "EmbeddingsBenchmarksReport", + "EmbeddingsMetrics", + "EmbeddingsMetricsAccumulator", + "EmbeddingsQualityMetrics", + "EmbeddingsQualityMetricsAccumulator", + "EmbeddingsRequestsAccumulator", + "RunningMetricStats", + "SchedulerMetrics", + "SchedulerMetricsAccumulator", +] diff --git a/src/guidellm/benchmark/schemas/embeddings/accumulator.py b/src/guidellm/benchmark/schemas/embeddings/accumulator.py new file mode 100644 index 000000000..74eeb4ba5 --- /dev/null +++ b/src/guidellm/benchmark/schemas/embeddings/accumulator.py @@ -0,0 +1,680 @@ +""" +Real-time metric accumulation for embeddings benchmark execution. + +Captures and computes performance metrics during embeddings benchmark runs, tracking +timing phases, request statistics, input token throughput, and latency distributions. +Unlike generative workloads, embeddings do not have output tokens or streaming behavior, +so this accumulator focuses on input processing metrics and optional quality validation +metrics like cosine similarity. +""" + +from __future__ import annotations + +import random +from typing import Literal + +from pydantic import Field + +from guidellm.benchmark.schemas.base import BenchmarkAccumulator, BenchmarkConfig +from guidellm.scheduler import MultiTurnRequestT, SchedulerState +from guidellm.schemas import ( + EmbeddingsRequestStats, + GenerationRequest, + GenerationResponse, + RequestInfo, + StandardBaseModel, + StatusBreakdown, + StatusDistributionSummary, +) + +__all__ = [ + "EmbeddingsBenchmarkAccumulator", + "EmbeddingsBenchmarkTimings", + "EmbeddingsMetricsAccumulator", + "EmbeddingsQualityMetricsAccumulator", + "EmbeddingsRequestsAccumulator", + "RunningMetricStats", + "SchedulerMetricsAccumulator", +] + + +class EmbeddingsBenchmarkTimings(StandardBaseModel): + """ + Tracks timing phases and transitions during embeddings benchmark execution. + + Monitors timestamps throughout benchmark execution including request submission, + measurement period boundaries (warmup/active/cooldown), and completion events. + """ + + request_start: float | None = Field( + description="Timestamp when the first request was sent", default=None + ) + measure_start: float | None = Field( + description="Timestamp when measurement period started", default=None + ) + measure_end: float | None = Field( + description="Timestamp when measurement period ended", default=None + ) + request_end: float | None = Field( + description="Timestamp when the last request was completed", default=None + ) + current_update: float | None = Field( + description="Most recent timestamp observed during execution", default=None + ) + current_request: float | None = Field( + description="Most recent request completion timestamp observed", default=None + ) + last_update: float | None = Field( + description="Previous timestamp observed before the current one", default=None + ) + last_request: float | None = Field( + description="Previous request completion timestamp before the current one", + default=None, + ) + + @property + def status(self) -> Literal["pending", "warmup", "active", "cooldown"]: + """ + :return: Current execution phase based on timing thresholds + """ + if self.request_start is None or self.current_update is None: + return "pending" + + if self.measure_start is None or self.current_update <= self.measure_start: + return "warmup" + + if self.measure_end is not None and self.current_update >= self.measure_end: + return "cooldown" + + return "active" + + @property + def duration(self) -> float: + """ + :return: Elapsed time since measurement or request start in seconds + """ + if self.request_start is None or self.current_update is None: + return 0.0 + + return self.current_update - self.request_start + + @property + def elapsed_time_last_update(self) -> float: + """ + :return: Time elapsed since last update + """ + if self.current_update is None or self.last_update is None: + return 0.0 + + return self.current_update - self.last_update + + @property + def finalized_request_start(self) -> float: + """ + :return: Finalized timestamp for when requests started + """ + return self.request_start or -1.0 + + @property + def finalized_measure_start(self) -> float: + """ + :return: Finalized timestamp for when measurement started + """ + return self.measure_start or self.finalized_request_start + + @property + def finalized_measure_end(self) -> float: + """ + :return: Finalized timestamp for when measurement ended + """ + return self.measure_end or self.finalized_request_end + + @property + def finalized_request_end(self) -> float: + """ + :return: Finalized timestamp for when requests ended + """ + return self.request_end or self.current_request or -1.0 + + def update_estimate( + self, + info: RequestInfo, + scheduler_state: SchedulerState, + config: BenchmarkConfig, + ): + """ + Update timing estimates based on request info and scheduler state. + + :param info: Request information containing timing data + :param scheduler_state: Current scheduler state with progress metrics + :param config: Benchmark configuration with warmup/cooldown settings + """ + # Update non-terminal timestamps + self.request_start = scheduler_state.start_requests_time + self.last_update = self.current_update + if (current_time := info.timings.last_reported) is not None: + self.current_update = ( + current_time + if self.current_update is None + else max(self.current_update, current_time) + ) + + # Update measurement period timestamps + warmup_active, measure_start = config.warmup.compute_transition_time( + info=info, state=scheduler_state, period="start" + ) + if not warmup_active: + self.measure_start = self.request_start + elif measure_start is not None: + self.measure_start = measure_start + + cooldown_active, measure_end = config.cooldown.compute_transition_time( + info=info, state=scheduler_state, period="end" + ) + if cooldown_active and measure_end is not None: + self.measure_end = measure_end + + # Update terminal timestamps for completed requests + if info.status in {"completed", "errored", "cancelled"}: + self.last_request = self.current_request + if info.completed_at is not None and ( + self.current_request is None or info.completed_at > self.current_request + ): + self.current_request = info.completed_at + + # Update request stop timestamps + if scheduler_state.end_processing_time is not None and self.request_end is None: + self.request_end = ( + scheduler_state.progress.stop_time + or self.current_request + or scheduler_state.end_processing_time + ) + if self.measure_end is None: + self.measure_end = self.request_end + + +class RunningMetricStats(StandardBaseModel): + """ + Maintains running statistics for a metric stream without storing all samples. + + Accumulates count, sum, time-weighted sum, and duration for efficient + real-time metric tracking during long-running benchmarks. + """ + + count: int = Field(description="Number of samples accumulated", default=0) + value_sum: float = Field(description="Total sum of accumulated values", default=0.0) + time_weighted_sum: float = Field( + description="Time-weighted sum of accumulated values", default=0.0 + ) + duration: float = Field( + description="Total duration over which values were accumulated", default=0.0 + ) + last_value: float | None = Field( + description="Most recent value added to the accumulator", default=None + ) + + @property + def mean(self) -> float | None: + """ + :return: Arithmetic mean of accumulated values, or None if no samples + """ + if self.count <= 0: + return None + return self.value_sum / self.count + + @property + def time_weighted_mean(self) -> float | None: + """ + :return: Time-weighted mean considering duration between samples, or None + """ + if self.duration <= 0.0: + return None + return self.time_weighted_sum / self.duration + + @property + def rate_per_item(self) -> float | None: + """ + :return: Average value per accumulated item, or None if no samples + """ + if self.count <= 0: + return None + return self.value_sum / self.count + + @property + def rate_per_second(self) -> float | None: + """ + :return: Average value per second of duration, or None if no duration + """ + if self.duration <= 0.0: + return None + return self.value_sum / self.duration + + def update_estimate( + self, + value: float | None, + count: int = 1, + duration: float | None = None, + elapsed: float | None = None, + ): + """ + Incorporate a new metric value into running statistics. + + Updates count, sum, and time-weighted statistics using the new value and timing + information. Time-weighted calculations use the previous value over the elapsed + interval to capture sustained metric behavior. + + :param value: New metric value to accumulate + :param count: Number of occurrences this value represents + :param duration: Total duration to set, overriding incremental elapsed updates + :param elapsed: Time elapsed since last update for time-weighted calculations + """ + self.count += count + self.value_sum += (value or 0.0) * count + + if elapsed is not None: + self.time_weighted_sum += (self.last_value or 0.0) * elapsed + + self.duration = ( + duration if duration is not None else (self.duration + (elapsed or 0.0)) + ) + self.last_value = value + + +class SchedulerMetricsAccumulator(StandardBaseModel): + """ + Tracks scheduler-level timing and overhead metrics during execution. + """ + + start_time: float = Field(description="Scheduler start timestamp", default=0.0) + request_start_time: float = Field( + description="First request timestamp", default=0.0 + ) + measure_start_time: float = Field( + description="Measurement start timestamp", default=0.0 + ) + measure_end_time: float = Field( + description="Measurement end timestamp", default=0.0 + ) + request_end_time: float = Field(description="Last request timestamp", default=0.0) + end_time: float = Field(description="Scheduler end timestamp", default=0.0) + + requests_made: StatusBreakdown[int, int, int, int] = Field( + description="Request counts by status", + default_factory=lambda: StatusBreakdown[int, int, int, int]( + successful=0, errored=0, incomplete=0, total=0 + ), + ) + + # Running metrics for progress tracking (compatible with generative) + queued_time: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Running stats for time requests spent in the queue", + ) + resolve_start_delay: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Running stats for delay before worker starts resolving", + ) + resolve_targeted_start_delay: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Running stats for delay to targeted resolve start", + ) + request_start_delay: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Running stats for delay from resolve to request start", + ) + request_targeted_start_delay: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Running stats for delay to targeted request start", + ) + resolve_end_delay: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Running stats for delay after request end till worker resolves", + ) + + # Sum fields for final compilation + queued_time_sum: float = Field( + description="Total time requests spent in queue", default=0.0 + ) + resolve_start_delay_sum: float = Field( + description="Total delay before worker starts resolving", default=0.0 + ) + resolve_targeted_start_delay_sum: float = Field( + description="Total delay to targeted resolve start", default=0.0 + ) + request_start_delay_sum: float = Field( + description="Total delay from resolve to request start", default=0.0 + ) + resolve_time_sum: float = Field( + description="Total resolution time", default=0.0 + ) + + def update_estimate( + self, scheduler_state: SchedulerState, stats: EmbeddingsRequestStats + ): + """ + Update scheduler metrics with completed request timing data. + + :param scheduler_state: Current scheduler state + :param stats: Completed request statistics + """ + # Update request counts + self.requests_made.successful = scheduler_state.successful_requests + self.requests_made.errored = scheduler_state.errored_requests + self.requests_made.incomplete = scheduler_state.cancelled_requests + self.requests_made.total = ( + scheduler_state.successful_requests + + scheduler_state.errored_requests + + scheduler_state.cancelled_requests + ) + + # Update timing sums and running stats + timings = stats.info.timings + if timings.queued is not None and timings.dequeued is not None: + queued_time_val = timings.dequeued - timings.queued + self.queued_time_sum += queued_time_val + self.queued_time.update_estimate(value=queued_time_val) + + if timings.dequeued is not None and timings.resolve_start is not None: + resolve_start_delay_val = timings.resolve_start - timings.dequeued + self.resolve_start_delay_sum += resolve_start_delay_val + self.resolve_start_delay.update_estimate(value=resolve_start_delay_val) + + if timings.targeted_start is not None and timings.resolve_start is not None: + resolve_targeted_delay_val = timings.resolve_start - timings.targeted_start + self.resolve_targeted_start_delay_sum += resolve_targeted_delay_val + self.resolve_targeted_start_delay.update_estimate( + value=resolve_targeted_delay_val + ) + + if timings.resolve_start is not None and timings.request_start is not None: + request_start_delay_val = timings.request_start - timings.resolve_start + self.request_start_delay_sum += request_start_delay_val + self.request_start_delay.update_estimate(value=request_start_delay_val) + + if timings.targeted_start is not None and timings.request_start is not None: + request_targeted_delay_val = ( + timings.request_start - timings.targeted_start + ) + self.request_targeted_start_delay.update_estimate( + value=request_targeted_delay_val + ) + + if timings.request_end is not None and timings.resolve_end is not None: + resolve_end_delay_val = timings.resolve_end - timings.request_end + self.resolve_end_delay.update_estimate(value=resolve_end_delay_val) + + if timings.resolve_start is not None and timings.resolve_end is not None: + resolve_time_val = timings.resolve_end - timings.resolve_start + self.resolve_time_sum += resolve_time_val + + +class EmbeddingsQualityMetricsAccumulator(StandardBaseModel): + """ + Accumulates quality validation metrics for embeddings. + + Tracks cosine similarity scores and MTEB benchmark results when quality + validation is enabled. + """ + + cosine_similarities: list[float] = Field( + default_factory=list, + description="Cosine similarity scores against baseline", + ) + baseline_cosine_similarity: StatusDistributionSummary | None = Field( + default=None, + description="Compiled cosine similarity distribution", + ) + self_consistency_score: StatusDistributionSummary | None = Field( + default=None, + description="Compiled self-consistency scores", + ) + mteb_main_score: float | None = Field( + default=None, + description="MTEB main score (if evaluated)", + ) + mteb_task_scores: dict[str, float] | None = Field( + default=None, + description="Individual MTEB task scores", + ) + + +class EmbeddingsCompletedMetricsAccumulator(StandardBaseModel): + """ + Tracks real-time metrics for completed embeddings requests. + + Used for progress tracking during benchmark execution. + """ + + requests: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Requests completion metrics", + ) + request_latency: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Request latency running stats", + ) + prompt_tokens: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Input tokens running stats", + ) + total_tokens: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Total tokens (same as prompt for embeddings)", + ) + + +class EmbeddingsMetricsAccumulator(StandardBaseModel): + """ + Accumulates performance metrics during embeddings benchmark execution. + + Tracks request latency, throughput, and input token metrics. Does not track + output tokens or streaming metrics (no TTFT/ITL for embeddings). + """ + + requests_per_second: StatusDistributionSummary = Field( + default_factory=StatusDistributionSummary, + description="Requests per second distribution", + ) + request_concurrency: StatusDistributionSummary = Field( + default_factory=StatusDistributionSummary, + description="Request concurrency distribution", + ) + request_latency: StatusDistributionSummary = Field( + default_factory=StatusDistributionSummary, + description="Request latency distribution", + ) + input_tokens_per_second: StatusDistributionSummary = Field( + default_factory=StatusDistributionSummary, + description="Input tokens per second distribution", + ) + + +class EmbeddingsRequestsAccumulator(StandardBaseModel): + """ + Accumulates embeddings request statistics during benchmark execution. + + Uses reservoir sampling to maintain a representative sample of requests + across different status categories. + """ + + successful: list[EmbeddingsRequestStats] = Field( + default_factory=list, + description="Sample of successful embeddings requests", + ) + incomplete: list[EmbeddingsRequestStats] = Field( + default_factory=list, + description="Sample of incomplete embeddings requests", + ) + errored: list[EmbeddingsRequestStats] = Field( + default_factory=list, + description="Sample of errored embeddings requests", + ) + + +class EmbeddingsBenchmarkAccumulator( + BenchmarkAccumulator[GenerationRequest, GenerationResponse] +): + """ + Accumulates metrics during embeddings benchmark execution. + + Extends BenchmarkAccumulator with embeddings-specific metric tracking including + input token processing, request latency, and optional quality validation metrics. + Does not track output tokens or streaming behavior. + """ + + type_: Literal["embeddings_benchmark_accumulator"] = ( + "embeddings_benchmark_accumulator" + ) + + # Core accumulators + timings: EmbeddingsBenchmarkTimings = Field( + default_factory=EmbeddingsBenchmarkTimings, + description="Timing phase tracking", + ) + scheduler_metrics: SchedulerMetricsAccumulator = Field( + default_factory=SchedulerMetricsAccumulator, + description="Scheduler metrics accumulation", + ) + concurrency_metric: RunningMetricStats = Field( + default_factory=RunningMetricStats, + description="Time-weighted concurrency statistics", + ) + completed_metrics: EmbeddingsCompletedMetricsAccumulator = Field( + default_factory=EmbeddingsCompletedMetricsAccumulator, + description="Real-time metrics for completed requests", + ) + metrics: EmbeddingsMetricsAccumulator = Field( + default_factory=EmbeddingsMetricsAccumulator, + description="Performance metrics accumulation", + ) + requests: EmbeddingsRequestsAccumulator = Field( + default_factory=EmbeddingsRequestsAccumulator, + description="Request statistics accumulation", + ) + + # Quality validation (optional) + quality_enabled: bool = Field( + default=False, + description="Whether quality validation is enabled", + ) + quality: EmbeddingsQualityMetricsAccumulator | None = Field( + default=None, + description="Quality metrics accumulation (when enabled)", + ) + + # Encoding format tracking + encoding_format_breakdown: dict[str, int] = Field( + default_factory=dict, + description="Request count by encoding format", + ) + + # Reservoir sampling parameters + _sampling_counts: dict[str, int] = {} + _max_samples: int = 1000 + + def update_estimate( # noqa: C901, PLR0912 + self, + response: GenerationResponse | None, + request: GenerationRequest | MultiTurnRequestT[GenerationRequest], + info: RequestInfo, + scheduler_state: SchedulerState, + ): + """ + Update accumulated metrics with a new request completion. + + :param response: Response from the backend (if successful) + :param request: Original generation request + :param info: Request metadata and timing information + :param scheduler_state: Current scheduler state + """ + # Update timing state + self.timings.update_estimate(info, scheduler_state, self.config) + duration = self.timings.duration + self.concurrency_metric.update_estimate( + value=scheduler_state.processing_requests, + duration=duration, + ) + + # Determine request status and target accumulator + if info.status == "completed": + status_key = "completed" + status_list = self.requests.successful + elif info.status == "errored": + status_key = "errored" + status_list = self.requests.errored + elif info.status == "cancelled" and info.timings.resolve_start is not None: + status_key = "incomplete" + status_list = self.requests.incomplete + else: + # Not a terminal status or cancelled before starting + # Do not include in requests or metrics + return + + # Build request stats + # Use response metrics if available (has actual token counts from server), + # otherwise fall back to request metrics (word/char counts only) + if isinstance(request, GenerationRequest): + request_input_metrics = request.input_metrics + else: + # For multi-turn requests, extract the first request + first_req = request[0] if isinstance(request, list | tuple) else None + if isinstance(first_req, tuple): + request_input_metrics = first_req[0].input_metrics + elif isinstance(first_req, GenerationRequest): + request_input_metrics = first_req.input_metrics + else: + request_input_metrics = None + + input_metrics = ( + response.input_metrics if response is not None else request_input_metrics + ) + stats = EmbeddingsRequestStats( + request_id=info.request_id, + info=info, + input_metrics=input_metrics, + ) + + # Track encoding format if available + if isinstance(request, GenerationRequest) and hasattr( + request, "encoding_format" + ): + format_key = request.encoding_format or "float" + self.encoding_format_breakdown[format_key] = ( + self.encoding_format_breakdown.get(format_key, 0) + 1 + ) + + # Update scheduler metrics + self.scheduler_metrics.update_estimate(scheduler_state, stats) + + # Update completed metrics for progress tracking (only for completed requests) + if status_key == "completed": + self.completed_metrics.requests.update_estimate( + value=1.0, + count=1, + duration=self.timings.duration, + ) + if stats.request_latency is not None: + self.completed_metrics.request_latency.update_estimate( + value=stats.request_latency, + count=1, + ) + if stats.prompt_tokens is not None: + self.completed_metrics.prompt_tokens.update_estimate( + value=float(stats.prompt_tokens), + count=1, + ) + self.completed_metrics.total_tokens.update_estimate( + value=float(stats.prompt_tokens), + count=1, + ) + + # Reservoir sampling + sample_count = self._sampling_counts.get(status_key, 0) + if len(status_list) < self._max_samples: + status_list.append(stats) + else: + # Replace with decreasing probability + j = random.randint(0, sample_count) + if j < self._max_samples: + status_list[j] = stats + self._sampling_counts[status_key] = sample_count + 1 diff --git a/src/guidellm/benchmark/schemas/embeddings/benchmark.py b/src/guidellm/benchmark/schemas/embeddings/benchmark.py new file mode 100644 index 000000000..7991ea56b --- /dev/null +++ b/src/guidellm/benchmark/schemas/embeddings/benchmark.py @@ -0,0 +1,160 @@ +""" +Benchmark data models and metrics for embeddings performance measurement. + +Provides comprehensive data structures for capturing, storing, and analyzing +benchmark results from scheduler-driven embeddings workload executions. Core +abstractions include embeddings-specific metrics without output tokens or streaming +behavior, request-level statistics tracking, and multi-benchmark reporting capabilities. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field, computed_field + +from guidellm.benchmark.schemas.base import Benchmark, BenchmarkConfig +from guidellm.benchmark.schemas.embeddings.accumulator import ( + EmbeddingsBenchmarkAccumulator, +) +from guidellm.benchmark.schemas.embeddings.metrics import ( + EmbeddingsMetrics, + SchedulerMetrics, +) +from guidellm.scheduler import SchedulerState +from guidellm.schemas import ( + EmbeddingsRequestStats, + StatusBreakdown, + StatusDistributionSummary, +) + +__all__ = ["EmbeddingsBenchmark"] + + +class EmbeddingsBenchmark(Benchmark[EmbeddingsBenchmarkAccumulator]): + """ + Complete embeddings benchmark results with specialized metrics. + + Encapsulates comprehensive performance data from scheduler-driven embeddings + workload executions including request-level statistics, input token metrics, + latency distributions, and optional quality validation metrics. Unlike generative + benchmarks, does not track output tokens or streaming behavior. + """ + + type_: Literal["embeddings_benchmark"] = "embeddings_benchmark" # type: ignore[assignment] + + config: BenchmarkConfig = Field( + description="Configuration parameters for this benchmark execution", + ) + scheduler_state: SchedulerState = Field( + description="Final state of the scheduler after benchmark completion", + ) + scheduler_metrics: SchedulerMetrics = Field( + description="Scheduler timing and performance statistics", + ) + metrics: EmbeddingsMetrics = Field( + description="Performance metrics and statistical distributions", + ) + requests: StatusBreakdown[ + list[EmbeddingsRequestStats], + list[EmbeddingsRequestStats], + list[EmbeddingsRequestStats], + None, + ] = Field( + description=( + "Request details grouped by status: successful, incomplete, errored" + ), + ) + + @computed_field # type: ignore[prop-decorator] + @property + def start_time(self) -> float: + """ + :return: Benchmark start time in seconds since epoch + """ + return self.scheduler_metrics.measure_start_time + + @computed_field # type: ignore[prop-decorator] + @property + def end_time(self) -> float: + """ + :return: Benchmark end time in seconds since epoch + """ + return self.scheduler_metrics.measure_end_time + + @computed_field # type: ignore[prop-decorator] + @property + def duration(self) -> float: + """ + :return: Total benchmark execution duration in seconds + """ + return self.end_time - self.start_time + + @computed_field # type: ignore[prop-decorator] + @property + def warmup_duration(self) -> float: + """ + :return: Warmup phase duration in seconds + """ + return ( + self.scheduler_metrics.measure_start_time + - self.scheduler_metrics.request_start_time + ) + + @computed_field # type: ignore[prop-decorator] + @property + def cooldown_duration(self) -> float: + """ + :return: Cooldown phase duration in seconds + """ + return ( + self.scheduler_metrics.request_end_time + - self.scheduler_metrics.measure_end_time + ) + + @property + def request_latency(self) -> StatusDistributionSummary: + """ + :return: Statistical distribution of request latencies across all requests + """ + return self.metrics.request_latency + + @property + def request_throughput(self) -> StatusDistributionSummary: + """ + :return: Statistical distribution of throughput measured in requests per second + """ + return self.metrics.requests_per_second + + @property + def request_concurrency(self) -> StatusDistributionSummary: + """ + :return: Statistical distribution of concurrent requests throughout execution + """ + return self.metrics.request_concurrency + + @classmethod + def compile( + cls, + accumulator: EmbeddingsBenchmarkAccumulator, + scheduler_state: SchedulerState, + ) -> EmbeddingsBenchmark: + """ + Compile final benchmark results from accumulated execution state. + + :param accumulator: Accumulated benchmark state with request statistics + :param scheduler_state: Final scheduler state after execution completion + :return: Compiled embeddings benchmark instance with complete metrics + """ + return EmbeddingsBenchmark( + config=accumulator.config, + scheduler_state=scheduler_state, + scheduler_metrics=SchedulerMetrics.compile(accumulator, scheduler_state), + metrics=EmbeddingsMetrics.compile(accumulator, scheduler_state), + requests=StatusBreakdown( + successful=accumulator.requests.successful, + incomplete=accumulator.requests.incomplete, + errored=accumulator.requests.errored, + total=None, + ), + ) diff --git a/src/guidellm/benchmark/schemas/embeddings/entrypoints.py b/src/guidellm/benchmark/schemas/embeddings/entrypoints.py new file mode 100644 index 000000000..f205e09eb --- /dev/null +++ b/src/guidellm/benchmark/schemas/embeddings/entrypoints.py @@ -0,0 +1,311 @@ +""" +Configuration entrypoints for embeddings benchmark execution. + +Defines parameter schemas for creating embeddings benchmark runs from scenario files +or runtime arguments. Extends standard benchmark configuration with embeddings-specific +options including quality validation settings (baseline model, cosine similarity +tolerance) and MTEB benchmark integration. +""" + +from __future__ import annotations + +import inspect +import json +from collections.abc import Callable +from pathlib import Path +from typing import Any, Literal + +import yaml +from pydantic import ( + AliasChoices, + AliasGenerator, + ConfigDict, + Field, + field_serializer, +) +from torch.utils.data import Sampler +from transformers import PreTrainedTokenizerBase + +from guidellm.backends import Backend, BackendType +from guidellm.benchmark.profiles import Profile, ProfileType +from guidellm.benchmark.scenarios import get_builtin_scenarios +from guidellm.benchmark.schemas.base import TransientPhaseConfig +from guidellm.data import DatasetFinalizer, DatasetPreprocessor +from guidellm.scheduler import StrategyType +from guidellm.schemas import StandardBaseModel + +__all__ = ["BenchmarkEmbeddingsArgs"] + + +class BenchmarkEmbeddingsArgs(StandardBaseModel): + """ + Configuration arguments for embeddings benchmark execution. + + Defines all parameters for embeddings benchmark setup including target endpoint, + data sources, backend configuration, processing pipeline, output formatting, + execution constraints, and embeddings-specific quality validation options. + + Example:: + + # Basic embeddings benchmark + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000/v1", + data=["path/to/texts.json"], + profile="sweep" + ) + + # With quality validation + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000/v1", + data=["path/to/texts.json"], + enable_quality_validation=True, + baseline_model="sentence-transformers/all-MiniLM-L6-v2", + quality_tolerance=1e-2 + ) + + # With MTEB benchmarking + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000/v1", + data=["path/to/texts.json"], + enable_mteb=True, + mteb_tasks=["STS12", "STS13"] + ) + """ + + @classmethod + def create( + cls, scenario: Path | str | None, **kwargs: dict[str, Any] + ) -> BenchmarkEmbeddingsArgs: + """ + Create benchmark args from scenario file and keyword arguments. + + :param scenario: Path to scenario file, built-in scenario name, or None + :param kwargs: Keyword arguments to override scenario values + :return: Configured benchmark args instance + :raises ValueError: If scenario is not found or file format is unsupported + """ + constructor_kwargs = {} + + if scenario is not None: + if isinstance(scenario, str) and scenario in ( + builtin_scenarios := get_builtin_scenarios() + ): + scenario_path = builtin_scenarios[scenario] + elif Path(scenario).exists() and Path(scenario).is_file(): + scenario_path = Path(scenario) + else: + raise ValueError(f"Scenario '{scenario}' not found.") + + with scenario_path.open() as file: + if scenario_path.suffix == ".json": + scenario_data = json.load(file) + elif scenario_path.suffix in {".yaml", ".yml"}: + scenario_data = yaml.safe_load(file) + else: + raise ValueError( + f"Unsupported scenario file format: {scenario_path.suffix}" + ) + if "args" in scenario_data: + scenario_data = scenario_data["args"] + constructor_kwargs.update(scenario_data) + + constructor_kwargs.update(kwargs) + return cls.model_validate(constructor_kwargs) + + @classmethod + def get_default(cls: type[BenchmarkEmbeddingsArgs], field: str) -> Any: + """ + Retrieve default value for a model field. + + :param field: Field name to retrieve default value for + :return: Default value for the field + :raises ValueError: If field does not exist + """ + if field not in cls.model_fields: + raise ValueError(f"Field '{field}' not found in {cls.__name__}") + + field_info = cls.model_fields[field] + factory = field_info.default_factory + + if factory is None: + return field_info.default + + if len(inspect.signature(factory).parameters) == 0: + return factory() # type: ignore[call-arg] + else: + return factory({}) # type: ignore[call-arg] + + model_config = ConfigDict( + extra="ignore", + use_enum_values=True, + from_attributes=True, + arbitrary_types_allowed=True, + validate_by_alias=True, + validate_by_name=True, + alias_generator=AliasGenerator( + validation_alias=lambda field_name: AliasChoices( + field_name, field_name.replace("_", "-") + ), + ), + ) + + # Required + target: str = Field(description="Target endpoint URL for benchmark execution") + data: list[Any] = Field( + description="List of dataset sources or data files", + default_factory=list, + min_length=1, + ) + + # Benchmark configuration + profile: StrategyType | ProfileType | Profile = Field( + default="sweep", description="Benchmark profile or scheduling strategy type" + ) + rate: list[float] | None = Field( + default=None, description="Request rate(s) for rate-based scheduling" + ) + + # Backend configuration + backend: BackendType | Backend = Field( + default="openai_http", description="Backend type or instance for execution" + ) + backend_kwargs: dict[str, Any] | None = Field( + default=None, description="Additional backend configuration arguments" + ) + request_format: str | None = Field( + default=None, + description="Query format for backend operations" + ) + model: str | None = Field(default=None, description="Model identifier for backend") + + # Data configuration + processor: str | Path | PreTrainedTokenizerBase | None = Field( + default=None, description="Tokenizer path, name, or instance for processing" + ) + processor_args: dict[str, Any] | None = Field( + default=None, description="Additional tokenizer configuration arguments" + ) + data_args: list[dict[str, Any]] | None = Field( + default_factory=list, # type: ignore[arg-type] + description="Per-dataset configuration arguments", + ) + data_samples: int = Field( + default=-1, description="Number of samples to use from datasets (-1 for all)" + ) + data_column_mapper: ( + DatasetPreprocessor + | dict[str, str | list[str]] + | Literal["embeddings_column_mapper"] + ) = Field( + default="embeddings_column_mapper", + description="Column mapping preprocessor for dataset fields", + ) + data_preprocessors: list[DatasetPreprocessor | dict[str, str | list[str]] | str] = ( + Field( + default_factory=list, # type: ignore[arg-type] + description="List of dataset preprocessors to apply in order", + ) + ) + data_preprocessors_kwargs: dict[str, Any] = Field( + default_factory=dict, + description="Global arguments for data preprocessors", + ) + data_finalizer: DatasetFinalizer | str | dict[str, Any] = Field( + default="embeddings", + description="Finalizer for preparing data samples into requests", + ) + data_collator: Callable | Literal["embeddings"] | None = Field( + default="embeddings", description="Data collator for batch processing" + ) + data_sampler: Sampler[int] | Literal["shuffle"] | None = Field( + default=None, description="Data sampler for request ordering" + ) + data_num_workers: int | None = Field( + default=0, description="Number of workers for data loading" + ) + dataloader_kwargs: dict[str, Any] | None = Field( + default=None, description="Additional dataloader configuration arguments" + ) + random_seed: int = Field(default=42, description="Random seed for reproducibility") + + # Output configuration + outputs: list[str] | tuple[str] = Field( + default_factory=lambda: ["json", "csv", "html"], + description="Output types to create (json, csv, html)", + ) + output_dir: str | Path = Field( + default_factory=Path.cwd, + description="Directory for saving output files", + ) + output_kwargs: dict[str, Any] | None = Field( + default=None, description="Additional output formatter arguments" + ) + + # Constraint configuration + max_requests: int | None = Field( + default=None, description="Maximum number of requests to execute" + ) + max_errors: int | None = Field( + default=None, description="Maximum allowed errors before stopping" + ) + max_duration: float | None = Field( + default=None, description="Maximum duration in seconds" + ) + warmup: TransientPhaseConfig | float | int | dict | None = Field( + default=None, description="Warmup phase configuration" + ) + cooldown: TransientPhaseConfig | float | int | dict | None = Field( + default=None, description="Cooldown phase configuration" + ) + + # EMBEDDINGS-SPECIFIC: Quality validation options + enable_quality_validation: bool = Field( + default=False, + description="Enable quality validation against baseline model", + ) + baseline_model: str | None = Field( + default=None, + description=( + "HuggingFace model for baseline comparison " + "(e.g., 'sentence-transformers/all-MiniLM-L6-v2')" + ), + ) + quality_tolerance: float = Field( + default=1e-2, + description=( + "Cosine similarity tolerance threshold (1e-2 standard, 5e-4 MTEB-level)" + ), + ) + + # EMBEDDINGS-SPECIFIC: MTEB benchmark options + enable_mteb: bool = Field( + default=False, + description="Enable MTEB benchmark evaluation", + ) + mteb_tasks: list[str] | None = Field( + default=None, + description=( + "MTEB tasks to evaluate (default: ['STS12', 'STS13', 'STSBenchmark'])" + ), + ) + + # EMBEDDINGS-SPECIFIC: Encoding format + encoding_format: Literal["float", "base64"] = Field( + default="float", + description="Embedding encoding format (float or base64)", + ) + + @field_serializer("output_dir") + def serialize_output_dir(self, value: Path) -> str: + """Serialize Path to string for JSON/YAML.""" + return str(value) + + @field_serializer("processor") + def serialize_processor(self, value: Any) -> str | None: + """Serialize processor to string representation.""" + if value is None: + return None + if isinstance(value, str | Path): + return str(value) + # For PreTrainedTokenizer instances, return name_or_path + return getattr(value, "name_or_path", str(value)) diff --git a/src/guidellm/benchmark/schemas/embeddings/metrics.py b/src/guidellm/benchmark/schemas/embeddings/metrics.py new file mode 100644 index 000000000..e6bf8a2ea --- /dev/null +++ b/src/guidellm/benchmark/schemas/embeddings/metrics.py @@ -0,0 +1,412 @@ +""" +Metrics schemas for embeddings benchmark results and performance analysis. + +This module defines comprehensive metric structures for tracking and analyzing +embeddings benchmark performance including request statistics, input token metrics, +and optional quality validation metrics such as cosine similarity and MTEB scores. +It provides statistical summaries with distribution analysis across successful, +incomplete, and errored requests, along with scheduler-level performance metrics +for request processing and queueing behavior. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field + +from guidellm.benchmark.schemas.embeddings.accumulator import ( + EmbeddingsBenchmarkAccumulator, +) +from guidellm.scheduler import SchedulerState +from guidellm.schemas import ( + StandardBaseDict, + StatusBreakdown, + StatusDistributionSummary, +) + +__all__ = [ + "EmbeddingsMetrics", + "EmbeddingsQualityMetrics", + "SchedulerMetrics", + "StatusTypes", + "TimedMetricTypeAlias", +] + + +TimedMetricTypeAlias = ( + tuple[float, float, int | float | None, int | float | None] | None +) +"""Timed metric tuple containing start_time, end_time, input_value, and output_value.""" + +StatusTypes = Literal["successful", "incomplete", "errored"] +"""Request status category for metric compilation.""" + +# Constants for tuple indexing +_TIMED_METRIC_START_TIME_INDEX = 0 +_TIMED_METRIC_END_TIME_INDEX = 1 +_TIMED_METRIC_INPUT_VALUE_INDEX = 2 +_TIMED_METRIC_OUTPUT_VALUE_INDEX = 3 + + +class SchedulerMetrics(StandardBaseDict): + """ + Scheduler timing and performance statistics. + + Tracks overall benchmark timing, request counts by status, and detailed internal + scheduler performance metrics including queue times, processing delays, and + request execution statistics. Used to analyze scheduler efficiency and identify + bottlenecks in request processing pipelines. + """ + + # Overall timings for the scheduler + start_time: float = Field( + description="Unix timestamp when the benchmark run started" + ) + request_start_time: float = Field( + description="Unix timestamp when first request was made" + ) + measure_start_time: float = Field( + description="Unix timestamp when measurement period started" + ) + measure_end_time: float = Field( + description="Unix timestamp when measurement period ended" + ) + request_end_time: float = Field( + description="Unix timestamp when last request completed" + ) + end_time: float = Field(description="Unix timestamp when the benchmark run ended") + + # Request details tracked by the scheduler + requests_made: StatusBreakdown[int, int, int, int] = Field( + description="Request counts by status: successful, incomplete, errored, total" + ) + + # Scheduler internal performance timings + queued_time_avg: float = Field( + description="Avg time requests spent in the queue (seconds)" + ) + resolve_start_delay_avg: float = Field( + description="Avg delay before worker begins resolving req after dequeue (sec)" + ) + resolve_targeted_start_delay_avg: float = Field( + description="Avg delay to targeted resolve start time (seconds)" + ) + request_start_delay_avg: float = Field( + description="Avg delay from resolve start to actual request start (seconds)" + ) + resolve_time_avg: float = Field( + description="Avg total resolution time per request (seconds)" + ) + + @classmethod + def compile( + cls, + accumulator: EmbeddingsBenchmarkAccumulator, + scheduler_state: SchedulerState, + ) -> SchedulerMetrics: + """ + Compile scheduler metrics from accumulator and scheduler state. + + :param accumulator: Accumulator containing scheduler timing and request data + :param scheduler_state: Scheduler state with execution timing information + :return: Compiled SchedulerMetrics instance with timing statistics + """ + num_requests = accumulator.scheduler_metrics.requests_made.total + + # Avoid division by zero - use -1.0 to indicate no requests processed + if num_requests is None or num_requests == 0: + queued_time_avg = -1.0 + resolve_start_delay_avg = -1.0 + resolve_targeted_start_delay_avg = -1.0 + request_start_delay_avg = -1.0 + resolve_time_avg = -1.0 + else: + queued_time_avg = ( + accumulator.scheduler_metrics.queued_time_sum / num_requests + ) + resolve_start_delay_avg = ( + accumulator.scheduler_metrics.resolve_start_delay_sum + / num_requests + ) + resolve_targeted_start_delay_avg = ( + accumulator.scheduler_metrics + .resolve_targeted_start_delay_sum + / num_requests + ) + request_start_delay_avg = ( + accumulator.scheduler_metrics.request_start_delay_sum + / num_requests + ) + resolve_time_avg = ( + accumulator.scheduler_metrics.resolve_time_sum / num_requests + ) + + return SchedulerMetrics( + start_time=scheduler_state.start_time, + request_start_time=accumulator.timings.finalized_request_start, + measure_start_time=accumulator.timings.finalized_measure_start, + measure_end_time=accumulator.timings.finalized_measure_end, + request_end_time=accumulator.timings.finalized_request_end, + end_time=scheduler_state.end_time or -1.0, + requests_made=accumulator.scheduler_metrics.requests_made, + queued_time_avg=queued_time_avg, + resolve_start_delay_avg=resolve_start_delay_avg, + resolve_targeted_start_delay_avg=resolve_targeted_start_delay_avg, + request_start_delay_avg=request_start_delay_avg, + resolve_time_avg=resolve_time_avg, + ) + + +class EmbeddingsQualityMetrics(StandardBaseDict): + """ + Quality validation metrics for embeddings. + + Tracks cosine similarity scores against baseline models and MTEB benchmark + performance. These metrics provide insights into embedding quality beyond + raw performance measurements. + """ + + baseline_cosine_similarity: StatusDistributionSummary | None = Field( + default=None, + description="Cosine similarity distribution against baseline model (0.0-1.0)", + ) + self_consistency_score: StatusDistributionSummary | None = Field( + default=None, + description="Self-consistency scores (same input → same embedding)", + ) + mteb_main_score: float | None = Field( + default=None, + description="MTEB benchmark main score (average across tasks)", + ) + mteb_task_scores: dict[str, float] | None = Field( + default=None, + description="Individual MTEB task scores (e.g., STS12, STS13)", + ) + + +class EmbeddingsMetrics(StandardBaseDict): + """ + Performance and quality metrics for embeddings benchmarks. + + Encapsulates comprehensive performance data from embeddings workload executions + including request-level statistics, input token metrics, and optional quality + validation metrics. Unlike generative metrics, embeddings metrics do not track + output tokens or streaming behavior (TTFT, ITL). + """ + + # Request statistics + request_totals: StatusBreakdown[int, int, int, int] = Field( + description="Total requests by status: successful, incomplete, errored, total" + ) + requests_per_second: StatusDistributionSummary = Field( + description=( + "Requests per second distribution across measurement period" + ) + ) + request_concurrency: StatusDistributionSummary = Field( + description=( + "Concurrent requests distribution throughout execution" + ) + ) + request_latency: StatusDistributionSummary = Field( + description="Request latency distribution (seconds)" + ) + + # Input token metrics (no output tokens for embeddings) + input_tokens_count: StatusBreakdown[int, int, int, int] = Field( + description=( + "Total input tokens by status: successful, incomplete, " + "errored, total" + ) + ) + input_tokens_per_second: StatusDistributionSummary = Field( + description="Input tokens per second distribution" + ) + + # Dummy output token fields for progress tracker compatibility (always zero) + output_token_count: StatusBreakdown[int, int, int, int] = Field( + default_factory=lambda: StatusBreakdown[int, int, int, int]( + successful=0, incomplete=0, errored=0, total=0 + ), + description="Output tokens (always 0 for embeddings)", + ) + output_tokens_per_second: StatusDistributionSummary = Field( + default_factory=StatusDistributionSummary, + description="Output tokens per second (always 0 for embeddings)", + ) + prompt_token_count: StatusBreakdown[int, int, int, int] | None = Field( + default=None, + description="Same as input_tokens_count (for compatibility)", + ) + tokens_per_second: StatusDistributionSummary | None = Field( + default=None, + description="Same as input_tokens_per_second (for compatibility)", + ) + + # Quality validation metrics (optional) + quality: EmbeddingsQualityMetrics | None = Field( + default=None, + description="Quality validation metrics (when enabled)", + ) + + # Encoding format breakdown + encoding_format_breakdown: dict[str, int] = Field( + default_factory=dict, + description=( + "Request count by encoding format (e.g., " + "{'float': 50, 'base64': 0})" + ), + ) + + @classmethod + def compile( + cls, + accumulator: EmbeddingsBenchmarkAccumulator, + _scheduler_state: SchedulerState, + ) -> EmbeddingsMetrics: + """ + Compile final embeddings metrics from accumulated execution state. + + :param accumulator: Accumulated benchmark state with request statistics + :param scheduler_state: Final scheduler state after execution completion + :return: Compiled embeddings metrics instance with complete statistics + """ + # Compile request counts + request_totals = StatusBreakdown[int, int, int, int]( + successful=len(accumulator.requests.successful), + incomplete=len(accumulator.requests.incomplete), + errored=len(accumulator.requests.errored), + total=( + len(accumulator.requests.successful) + + len(accumulator.requests.incomplete) + + len(accumulator.requests.errored) + ), + ) + + # Compile input token counts + input_tokens_count = StatusBreakdown[int, int, int, int]( + successful=sum( + req.input_metrics.total_tokens or 0 + for req in accumulator.requests.successful + ), + incomplete=sum( + req.input_metrics.total_tokens or 0 + for req in accumulator.requests.incomplete + ), + errored=sum( + req.input_metrics.total_tokens or 0 + for req in accumulator.requests.errored + ), + total=0, # Will be computed + ) + input_tokens_count.total = ( + (input_tokens_count.successful or 0) + + (input_tokens_count.incomplete or 0) + + (input_tokens_count.errored or 0) + ) + + # Compile distribution metrics from request statistics + start_time = accumulator.timings.finalized_measure_start + end_time = accumulator.timings.finalized_measure_end + + # Filter requests within measurement period + # If no valid measurement window (both -1.0), use all requests + if start_time == -1.0 or end_time == -1.0: + successful = accumulator.requests.successful + incomplete = accumulator.requests.incomplete + errored = accumulator.requests.errored + else: + successful = [ + req for req in accumulator.requests.successful + if start_time <= req.request_end_time <= end_time + ] + incomplete = [ + req for req in accumulator.requests.incomplete + if start_time <= req.request_end_time <= end_time + ] + errored = [ + req for req in accumulator.requests.errored + if start_time <= req.request_end_time <= end_time + ] + + # Compile distribution summaries + requests_per_second = ( + StatusDistributionSummary + .rate_distribution_from_timings_function( + function=lambda req: req.request_end_time, + successful=successful, + incomplete=incomplete, + errored=errored, + start_time=start_time, + end_time=end_time, + ) + ) + + request_concurrency = ( + StatusDistributionSummary + .concurrency_distribution_from_timings_function( + function=lambda req: ( + (req.request_start_time, req.request_end_time) + if req.request_start_time is not None + and req.request_end_time is not None + else None + ), + successful=successful, + incomplete=incomplete, + errored=errored, + start_time=start_time, + end_time=end_time, + ) + ) + + request_latency = StatusDistributionSummary.from_values( + successful=[ + req.request_latency + for req in successful + if req.request_latency is not None + ], + incomplete=[ + req.request_latency + for req in incomplete + if req.request_latency is not None + ], + errored=[ + req.request_latency + for req in errored + if req.request_latency is not None + ], + ) + + input_tokens_per_second = ( + StatusDistributionSummary + .rate_distribution_from_timings_function( + function=lambda req: req.input_tokens_timing, + successful=successful, + incomplete=incomplete, + errored=errored, + ) + ) + + # Compile quality metrics if available + quality_metrics = None + if accumulator.quality_enabled and accumulator.quality is not None: + quality_metrics = EmbeddingsQualityMetrics( + baseline_cosine_similarity=accumulator.quality.baseline_cosine_similarity, + self_consistency_score=accumulator.quality.self_consistency_score, + mteb_main_score=accumulator.quality.mteb_main_score, + mteb_task_scores=accumulator.quality.mteb_task_scores, + ) + + return EmbeddingsMetrics( + request_totals=request_totals, + requests_per_second=requests_per_second, + request_concurrency=request_concurrency, + request_latency=request_latency, + input_tokens_count=input_tokens_count, + input_tokens_per_second=input_tokens_per_second, + prompt_token_count=input_tokens_count, # Alias for compatibility + tokens_per_second=input_tokens_per_second, # Alias for compatibility + quality=quality_metrics, + encoding_format_breakdown=accumulator.encoding_format_breakdown, + ) diff --git a/src/guidellm/benchmark/schemas/embeddings/report.py b/src/guidellm/benchmark/schemas/embeddings/report.py new file mode 100644 index 000000000..14a4c47ac --- /dev/null +++ b/src/guidellm/benchmark/schemas/embeddings/report.py @@ -0,0 +1,194 @@ +""" +Report container for multiple embeddings benchmark results with persistence. + +Provides data structures for aggregating multiple embeddings benchmark executions +into a single report with file I/O capabilities. Supports loading and saving benchmark +collections in JSON and YAML formats, enabling result persistence, sharing, and analysis +across different execution sessions. +""" + +from __future__ import annotations + +import json +import platform +from importlib.metadata import version +from pathlib import Path +from typing import ClassVar, Literal + +import yaml +from pydantic import Field + +from guidellm.benchmark.schemas.embeddings.benchmark import EmbeddingsBenchmark +from guidellm.benchmark.schemas.embeddings.entrypoints import ( + BenchmarkEmbeddingsArgs, +) +from guidellm.schemas import StandardBaseModel + +__all__ = ["EmbeddingsBenchmarkMetadata", "EmbeddingsBenchmarksReport"] + + +class EmbeddingsBenchmarkMetadata(StandardBaseModel): + """ + Versioning and environment metadata for embeddings benchmark reports. + """ + + version: Literal[1] = Field( + description=( + "Version of the benchmark report schema, increments " + "whenever there is a breaking change to the output format" + ), + default=1, + ) + guidellm_version: str = Field( + description="Version of the guidellm package used for the benchmark", + default_factory=lambda: version("guidellm"), + ) + python_version: str = Field( + description="Version of Python interpreter used during the benchmark", + default_factory=lambda: platform.python_version(), + ) + platform: str = Field( + description="Operating system platform where the benchmark was executed", + default_factory=lambda: platform.platform(), + ) + + +class EmbeddingsBenchmarksReport(StandardBaseModel): + """ + Container for multiple embeddings benchmark results with load/save functionality. + + Aggregates multiple embeddings benchmark executions into a single report, + providing persistence through JSON and YAML file formats. Enables result + collection, storage, and retrieval across different execution sessions. + + :cvar DEFAULT_FILE: Default filename used when saving to or loading from a directory + """ + + DEFAULT_FILE: ClassVar[str] = "embeddings_benchmarks.json" + + type_: Literal["embeddings_benchmarks_report"] = Field( + description="Type identifier for embeddings benchmarks report", + default="embeddings_benchmarks_report", + ) + metadata: EmbeddingsBenchmarkMetadata = Field( + description="Metadata about the benchmark report and execution environment", + default_factory=EmbeddingsBenchmarkMetadata, + ) + args: BenchmarkEmbeddingsArgs = Field( + description="Benchmark arguments used for all benchmarks in the report" + ) + benchmarks: list[EmbeddingsBenchmark] = Field( + description="List of completed embeddings benchmarks in the report", + default_factory=list, + ) + + def save_file( + self, + path: str | Path | None = None, + type_: Literal["json", "yaml"] | None = None, + ) -> Path: + """ + Save report to file in JSON or YAML format. + + :param path: File path or directory for saving, defaults to current + directory + :param type_: File format override ('json' or 'yaml'), auto-detected + from extension + :return: Resolved path to the saved file + :raises ValueError: If file type is unsupported or cannot be determined + """ + file_path = EmbeddingsBenchmarksReport._resolve_path( + path if path is not None else Path.cwd() + ) + + if type_ is None: + type_ = EmbeddingsBenchmarksReport._detect_type(file_path) + + if type_ == "json": + file_path.write_text( + json.dumps( + self.model_dump(mode="json"), + indent=2, + ensure_ascii=False, + ) + ) + elif type_ == "yaml": + file_path.write_text( + yaml.dump( + self.model_dump(mode="json"), + default_flow_style=False, + sort_keys=False, + ) + ) + else: + raise ValueError(f"Unsupported file type: {type_}") + + return file_path + + @classmethod + def load_file( + cls, path: str | Path, type_: Literal["json", "yaml"] | None = None + ) -> EmbeddingsBenchmarksReport: + """ + Load report from file in JSON or YAML format. + + :param path: File path to load from + :param type_: File format override, auto-detected from extension if None + :return: Loaded embeddings benchmarks report instance + :raises ValueError: If file type is unsupported or cannot be determined + :raises FileNotFoundError: If specified file does not exist + """ + file_path = EmbeddingsBenchmarksReport._resolve_path(path) + + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + if type_ is None: + type_ = EmbeddingsBenchmarksReport._detect_type(file_path) + + content = file_path.read_text() + + if type_ == "json": + data = json.loads(content) + elif type_ == "yaml": + data = yaml.safe_load(content) + else: + raise ValueError(f"Unsupported file type: {type_}") + + return cls.model_validate(data) + + @staticmethod + def _resolve_path(path: str | Path) -> Path: + """ + Resolve file path, using DEFAULT_FILE if path is a directory. + + :param path: Input path as string or Path object + :return: Resolved absolute Path to file + """ + file_path = Path(path) if isinstance(path, str) else path + + if file_path.is_dir(): + file_path = file_path / EmbeddingsBenchmarksReport.DEFAULT_FILE + + return file_path.resolve() + + @staticmethod + def _detect_type(path: Path) -> Literal["json", "yaml"]: + """ + Detect file type from path extension. + + :param path: File path to analyze + :return: Detected file type ('json' or 'yaml') + :raises ValueError: If extension is not recognized + """ + suffix = path.suffix.lower() + + if suffix in {".json"}: + return "json" + elif suffix in {".yaml", ".yml"}: + return "yaml" + else: + raise ValueError( + f"Cannot detect file type from extension: {suffix}. " + "Use type_ parameter to specify 'json' or 'yaml'" + ) diff --git a/src/guidellm/benchmark/schemas/generative/accumulator.py b/src/guidellm/benchmark/schemas/generative/accumulator.py index 5a64b7a19..a7d7ee199 100644 --- a/src/guidellm/benchmark/schemas/generative/accumulator.py +++ b/src/guidellm/benchmark/schemas/generative/accumulator.py @@ -788,6 +788,21 @@ class GenerativeBenchmarkAccumulator( description="Running metrics for incomplete requests", ) + def model_post_init(self, __context): + """ + Initialize child accumulators with config values after model construction. + + Propagates sample_requests from config to child request accumulators to ensure + consistent sampling behavior across completed, errored, and incomplete request + collections. This ensures the --sample-requests option functions correctly. + """ + super().model_post_init(__context) + + # Propagate sample_requests from config to child accumulators + self.completed.sample_requests = self.config.sample_requests + self.errored.sample_requests = self.config.sample_requests + self.incomplete.sample_requests = self.config.sample_requests + def update_estimate( self, response: GenerationResponse | None, diff --git a/src/guidellm/benchmark/schemas/generative/entrypoints.py b/src/guidellm/benchmark/schemas/generative/entrypoints.py index 45d9a4b27..e85a5ba58 100644 --- a/src/guidellm/benchmark/schemas/generative/entrypoints.py +++ b/src/guidellm/benchmark/schemas/generative/entrypoints.py @@ -252,7 +252,7 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any: ) # Benchmarker configuration sample_requests: int | None = Field( - default=10, + default=None, description="Number of requests to sample for detailed metrics (None for all)", ) warmup: int | float | dict | TransientPhaseConfig | None = Field( diff --git a/src/guidellm/data/__init__.py b/src/guidellm/data/__init__.py index 8ff8609b9..22d54c97e 100644 --- a/src/guidellm/data/__init__.py +++ b/src/guidellm/data/__init__.py @@ -1,5 +1,5 @@ from .builders import ShortPromptStrategy -from .collators import GenerativeRequestCollator +from .collators import EmbeddingsRequestCollator, GenerativeRequestCollator from .deserializers import ( DataNotSupportedError, DatasetDeserializer, @@ -25,6 +25,7 @@ "DatasetFinalizer", "DatasetPreprocessor", "DatasetsIterator", + "EmbeddingsRequestCollator", "FinalizerRegistry", "GenerativeDatasetColumnType", "GenerativeRequestCollator", diff --git a/src/guidellm/data/collators.py b/src/guidellm/data/collators.py index f9e1ade4f..55e994a2a 100644 --- a/src/guidellm/data/collators.py +++ b/src/guidellm/data/collators.py @@ -2,10 +2,16 @@ from guidellm.schemas import GenerationRequest -__all__ = ["GenerativeRequestCollator"] +__all__ = ["EmbeddingsRequestCollator", "GenerativeRequestCollator"] class GenerativeRequestCollator: + """ + Collator for generative (chat/completion) requests. + + Currently enforces batch size of 1 - batching not yet supported. + """ + def __call__(self, batch: list) -> GenerationRequest: if len(batch) != 1: raise NotImplementedError( @@ -14,3 +20,28 @@ def __call__(self, batch: list) -> GenerationRequest: ) return batch[0] + + +class EmbeddingsRequestCollator: + """ + Collator for embeddings requests. + + Simple pass-through that enforces batch size of 1. Embeddings requests + are already properly formatted by the EmbeddingsRequestFinalizer. + """ + + def __call__(self, batch: list) -> GenerationRequest: + """ + Collate batch of embeddings requests. + + :param batch: List of GenerationRequest objects (should be length 1) + :return: Single GenerationRequest + :raises NotImplementedError: If batch size > 1 + """ + if len(batch) != 1: + raise NotImplementedError( + f"Batch size greater than 1 is not currently supported. " + f"Got batch size: {len(batch)}" + ) + + return batch[0] diff --git a/src/guidellm/data/config.py b/src/guidellm/data/config.py index 2b0b2133a..ea14967e0 100644 --- a/src/guidellm/data/config.py +++ b/src/guidellm/data/config.py @@ -93,7 +93,7 @@ def _load_config_str(data: str, config_class: type[ConfigT]) -> ConfigT | None: except Exception as err: # noqa: BLE001 error = err - if data_str.count("=") > 1: + if data_str.count("=") >= 1: # key=value pairs separated by commas try: config_dict = {} diff --git a/src/guidellm/data/finalizers.py b/src/guidellm/data/finalizers.py index f804ec821..128b1f992 100644 --- a/src/guidellm/data/finalizers.py +++ b/src/guidellm/data/finalizers.py @@ -113,3 +113,50 @@ def __call__( # noqa: C901 PLR0912 input_metrics=input_metrics, output_metrics=output_metrics, ) + + +@FinalizerRegistry.register("embeddings") +class EmbeddingsRequestFinalizer(DatasetFinalizer[GenerationRequest]): + """ + Finalizer that converts dataset rows into embeddings GenerationRequest objects. + + Much simpler than GenerativeRequestFinalizer since embeddings only need + a text input field. Collects text from 'text_column' and creates a request + with basic token/word counting. + + Example: + :: + finalizer = EmbeddingsRequestFinalizer() + row = {"text_column": ["This is a test sentence"]} + request = finalizer(row) + # request.body["input"] == "This is a test sentence" + """ + + def __call__(self, columns: dict[str, Any]) -> GenerationRequest: + """ + Convert dataset row to embeddings request. + + :param columns: Dict with 'text_column' containing text strings + :return: GenerationRequest configured for embeddings + """ + input_metrics = UsageMetrics() + texts = [] + + # Collect all text inputs + for text in columns.get("text_column", []): + if not text: + continue + + texts.append(text) + input_metrics.add_text_metrics(text) + + # For embeddings, input is a single text or list of texts + if not texts: + raise ValueError("No text found in dataset row for embeddings") + + # Create GenerationRequest with columns and metrics + return GenerationRequest( + columns=columns, + input_metrics=input_metrics, + output_metrics=UsageMetrics(), # Embeddings have no output + ) diff --git a/src/guidellm/data/preprocessors/__init__.py b/src/guidellm/data/preprocessors/__init__.py index abe493aea..0df1b1efd 100644 --- a/src/guidellm/data/preprocessors/__init__.py +++ b/src/guidellm/data/preprocessors/__init__.py @@ -1,3 +1,4 @@ +from .embeddings_mapper import EmbeddingsColumnMapper from .encoders import MediaEncoder from .mappers import GenerativeColumnMapper from .preprocessor import ( @@ -9,6 +10,7 @@ __all__ = [ "DataDependentPreprocessor", "DatasetPreprocessor", + "EmbeddingsColumnMapper", "GenerativeColumnMapper", "MediaEncoder", "PreprocessorRegistry", diff --git a/src/guidellm/data/preprocessors/embeddings_mapper.py b/src/guidellm/data/preprocessors/embeddings_mapper.py new file mode 100644 index 000000000..b3517da61 --- /dev/null +++ b/src/guidellm/data/preprocessors/embeddings_mapper.py @@ -0,0 +1,191 @@ +""" +Column mapper for embeddings datasets. + +Maps common text column names to the standard 'text_column' field expected by +the embeddings finalizer. Much simpler than the generative mapper since embeddings +only need a single text input field. +""" + +from __future__ import annotations + +from collections import defaultdict +from typing import Any, ClassVar, cast + +from datasets import Dataset, IterableDataset + +from guidellm.data.preprocessors.preprocessor import ( + DataDependentPreprocessor, + PreprocessorRegistry, +) + +__all__ = ["EmbeddingsColumnMapper"] + + +@PreprocessorRegistry.register("embeddings_column_mapper") +class EmbeddingsColumnMapper(DataDependentPreprocessor): + """ + Maps dataset columns to embeddings text field. + + Searches for common text column names and maps them to 'text_column' + for the embeddings finalizer to consume. + + Example: + :: + # Dataset with "text" column + mapper = EmbeddingsColumnMapper() + dataset = Dataset.from_dict({"text": ["Hello", "World"]}) + result = mapper.map(dataset) + # result["text_column"] will contain the text values + """ + + defaults: ClassVar[dict[str, list[str]]] = { + "text_column": [ + "text", + "input", + "content", + "prompt", + "sentence", + "document", + "passage", + "query", + "body", + "message", + ], + } + + def __init__( + self, + column_mappings: dict[str, str | list[str]] | None = None, + **_: Any, # Ignore global kwargs + ): + self.input_mappings = column_mappings + self.datasets_column_mappings: dict[str, list[tuple[int, str]]] | None = None + + @classmethod + def datasets_default_mappings( + cls, datasets: list[Dataset | IterableDataset] + ) -> dict[str, list[tuple[int, str]]]: + """ + Auto-detect text columns from datasets. + + :param datasets: List of datasets to analyze + :return: Mapping of column types to (dataset_index, column_name) tuples + """ + mappings: dict[str, list[tuple[int, str]]] = defaultdict(list) + + for index, dataset in enumerate(datasets): + dataset_columns = dataset.column_names or list(next(iter(dataset)).keys()) + + # Try to find text column + if "text_column" not in mappings or not mappings["text_column"]: + for name_base in cls.defaults.get("text_column", []): + # Try various case variations + for variant in [ + name_base, + name_base.lower(), + name_base.upper(), + name_base.capitalize(), + ]: + if variant in dataset_columns: + mappings["text_column"].append((index, variant)) + break + if mappings["text_column"]: + break + + return mappings + + @classmethod + def datasets_mappings( + cls, + datasets: list[Dataset | IterableDataset], + input_mappings: dict[str, str | list[str]], + ) -> dict[str, list[tuple[int, str]]]: + """ + Create mappings from user-specified column names. + + :param datasets: List of datasets to map + :param input_mappings: User-specified mappings + :return: Validated mappings of column types to (dataset_index, + column_name) tuples + """ + mappings: dict[str, list[tuple[int, str]]] = defaultdict(list) + + datasets_named_indices = { + ( + dataset.info.dataset_name + if dataset.info and dataset.info.dataset_name + else index + ): index + for index, dataset in enumerate(datasets) + } + datasets_columns = { + index: dataset.column_names or list(next(iter(dataset)).keys()) + for index, dataset in enumerate(datasets) + } + + # Parse user mappings + for column_type, names in input_mappings.items(): + mappings[column_type] = [] + for name in names if isinstance(names, list) else [names]: + if "." in name: + dataset, column_name = name.split(".", 1) + dataset_index = ( + int(dataset) + if dataset.isdigit() + else datasets_named_indices.get(dataset) + ) + else: + dataset_index = 0 + column_name = name + + if dataset_index is None or dataset_index >= len(datasets): + raise ValueError( + f"Dataset '{name}' not found in datasets: " + f"{datasets_named_indices}." + ) + if column_name not in datasets_columns[dataset_index]: + raise ValueError( + f"Column '{column_name}' not found in dataset {dataset_index}. " + f"Available columns: {datasets_columns[dataset_index]}" + ) + + mappings[column_type].append((dataset_index, column_name)) + + return mappings + + def __call__(self, row: dict[str, Any]) -> dict[str, list[Any]]: + """ + Transform a row by extracting text columns based on established mappings. + + :param row: Dictionary containing 'items' key with dataset rows + :return: Mapped dictionary with 'text_column' key + """ + if self.datasets_column_mappings is None: + raise ValueError("EmbeddingsColumnMapper not setup with data.") + + items = cast("dict[int, dict[str, Any]]", row.pop("items")) + mapped: dict[str, Any] = defaultdict(list) + + for column_type, column_mappings in self.datasets_column_mappings.items(): + for dataset_index, dataset_column in column_mappings: + mapped[column_type].append(items[dataset_index][dataset_column]) + + return dict(mapped) + + def setup_data( + self, + datasets: list[Dataset | IterableDataset], + data_args: list[dict[str, Any]], + ): + """ + Initialize column mappings from datasets. + + :param datasets: List of datasets to process + :param data_args: Arguments for each dataset (unused for this mapper) + """ + _ = data_args # Unused for this mapper + self.datasets_column_mappings = ( + self.datasets_default_mappings(datasets) + if self.input_mappings is None + else self.datasets_mappings(datasets, self.input_mappings) + ) diff --git a/src/guidellm/data/schemas.py b/src/guidellm/data/schemas.py index 16af56dff..5ac978530 100644 --- a/src/guidellm/data/schemas.py +++ b/src/guidellm/data/schemas.py @@ -125,6 +125,7 @@ class SyntheticTextDatasetConfig(DataConfig): output_tokens: int = Field( description="The average number of text tokens generated for outputs.", gt=0, + default=1, ) output_tokens_stdev: int | None = Field( description="The standard deviation of the tokens generated for outputs.", diff --git a/src/guidellm/mock_server/handlers/__init__.py b/src/guidellm/mock_server/handlers/__init__.py index 7dbc209ff..f4a34f75e 100644 --- a/src/guidellm/mock_server/handlers/__init__.py +++ b/src/guidellm/mock_server/handlers/__init__.py @@ -12,6 +12,12 @@ from .chat_completions import ChatCompletionsHandler from .completions import CompletionsHandler +from .embeddings import EmbeddingsHandler from .tokenizer import TokenizerHandler -__all__ = ["ChatCompletionsHandler", "CompletionsHandler", "TokenizerHandler"] +__all__ = [ + "ChatCompletionsHandler", + "CompletionsHandler", + "EmbeddingsHandler", + "TokenizerHandler", +] diff --git a/src/guidellm/mock_server/handlers/embeddings.py b/src/guidellm/mock_server/handlers/embeddings.py new file mode 100644 index 000000000..c24eaa539 --- /dev/null +++ b/src/guidellm/mock_server/handlers/embeddings.py @@ -0,0 +1,253 @@ +""" +Mock server handler for OpenAI-compatible /v1/embeddings endpoint. + +Generates synthetic normalized embedding vectors with configurable dimensions and +encoding formats. Simulates realistic embedding API behavior including timing delays, +token counting, and batch processing while providing deterministic outputs for testing. +""" + +from __future__ import annotations + +import asyncio +import base64 +import json +import math +import random +import struct +from typing import TYPE_CHECKING + +from pydantic import ValidationError +from sanic import response +from sanic.request import Request +from sanic.response import HTTPResponse + +from guidellm.mock_server.models import ( + EmbeddingObject, + EmbeddingsRequest, + EmbeddingsResponse, + ErrorDetail, + ErrorResponse, + Usage, +) +from guidellm.mock_server.utils import MockTokenizer + +if TYPE_CHECKING: + from guidellm.mock_server.config import MockServerConfig + +__all__ = ["EmbeddingsHandler"] + + +class EmbeddingsHandler: + """ + Handler for /v1/embeddings endpoint in mock server. + + Processes embeddings requests and generates synthetic normalized embedding + vectors with realistic timing simulation. Supports both float and base64 + encoding formats, batch processing, and optional dimension reduction. + + Example: + :: + handler = EmbeddingsHandler(config) + response = await handler.handle(request) + """ + + def __init__(self, config: MockServerConfig): + """ + Initialize embeddings handler with server configuration. + + :param config: Mock server configuration with timing and model parameters + """ + self.config = config + self.tokenizer = MockTokenizer() + + async def handle(self, request: Request) -> HTTPResponse: + """ + Process embeddings request and return response. + + :param request: HTTP request containing embeddings parameters + :return: HTTP response with embeddings data or error + """ + try: + # Parse request body + req = EmbeddingsRequest(**request.json) + except ValidationError as exc: + return response.json( + ErrorResponse( + error=ErrorDetail( + message=f"Invalid request: {str(exc)}", + type="invalid_request_error", + code="invalid_request", + ) + ).model_dump(), + status=400, + ) + except (json.JSONDecodeError, TypeError): + return response.json( + ErrorResponse( + error=ErrorDetail( + message="Invalid JSON in request body", + type="invalid_request_error", + code="invalid_json", + ) + ).model_dump(), + status=400, + ) + + # Handle input as list + inputs = [req.input] if isinstance(req.input, str) else req.input + + # Determine embedding dimensions + dimensions = ( + req.dimensions if req.dimensions is not None else 384 + ) # Default dim + + # Validate encoding format + encoding_format = req.encoding_format or "float" + if encoding_format not in {"float", "base64"}: + return response.json( + ErrorResponse( + error=ErrorDetail( + message=( + f"Invalid encoding_format: {encoding_format}. " + "Must be 'float' or 'base64'" + ), + type="invalid_request_error", + code="invalid_encoding_format", + ) + ).model_dump(), + status=400, + ) + + # Count total tokens (for timing and usage) + total_tokens = 0 + for text in inputs: + tokens = len(self.tokenizer.tokenize(text)) + + # Apply truncation if requested + if req.truncate_prompt_tokens is not None: + tokens = min(tokens, req.truncate_prompt_tokens) + + total_tokens += tokens + + # Simulate time-to-first-token delay based on input tokens + # TTFT is proportional to input processing time + if self.config.ttft_ms > 0: + delay_ms = max( + 0, + random.gauss( + self.config.ttft_ms, + self.config.ttft_ms_std if self.config.ttft_ms_std > 0 else 0, + ), + ) + await asyncio.sleep(delay_ms / 1000.0) + + # Generate embeddings for each input + embeddings_data = [] + for index, _text in enumerate(inputs): + # Generate synthetic normalized embedding + embedding_vector = self._generate_embedding(dimensions) + + # Encode based on requested format + embedding_encoded: list[float] | str + if encoding_format == "base64": + embedding_encoded = self._encode_to_base64(embedding_vector) + else: + embedding_encoded = embedding_vector + + embeddings_data.append( + EmbeddingObject( + embedding=embedding_encoded, + index=index, + ) + ) + + # Build usage stats (embeddings have no completion_tokens) + usage = Usage( + prompt_tokens=total_tokens, + completion_tokens=0, # Embeddings don't generate tokens + ) + + # Build response + embeddings_response = EmbeddingsResponse( + data=embeddings_data, + model=req.model, + usage=usage, + ) + + return HTTPResponse( + body=embeddings_response.model_dump_json(), + status=200, + headers={"Content-Type": "application/json"}, + ) + + def _generate_embedding(self, dimensions: int) -> list[float]: + """ + Generate synthetic normalized embedding vector. + + Creates a random vector and normalizes it to unit length (L2 norm = 1), + which is standard for embedding models. + + :param dimensions: Number of dimensions for the embedding + :return: Normalized embedding vector as list of floats + + Example: + :: + emb = handler._generate_embedding(384) + norm = math.sqrt(sum(x*x for x in emb)) # Should be ≈1.0 + """ + # Generate random vector from Gaussian distribution + embedding = [random.gauss(0, 1) for _ in range(dimensions)] + + # Normalize to unit length + norm = math.sqrt(sum(x * x for x in embedding)) + if norm > 0: + embedding = [x / norm for x in embedding] + + return embedding + + def _encode_to_base64(self, embedding: list[float]) -> str: + """ + Encode embedding vector as base64-encoded binary string. + + Converts float list to packed binary format (little-endian floats) + and encodes as base64 string for efficient transmission. + + :param embedding: Embedding vector as list of floats + :return: Base64-encoded binary representation + + Example: + :: + embedding = [0.1, 0.2, 0.3] + encoded = handler._encode_to_base64(embedding) + # Returns base64 string like "MzMzPz8/Pz8/Pz8=" + """ + # Pack floats as little-endian binary + # Format: 'f' = single-precision float (4 bytes each) + bytes_data = struct.pack(f"{len(embedding)}f", *embedding) + + # Encode as base64 + return base64.b64encode(bytes_data).decode("utf-8") + + @staticmethod + def decode_from_base64(encoded: str, dimensions: int) -> list[float]: + """ + Decode base64-encoded embedding back to float list. + + Utility method for testing and validation. Reverses the encoding + performed by _encode_to_base64. + + :param encoded: Base64-encoded binary string + :param dimensions: Number of dimensions to decode + :return: Decoded embedding vector as list of floats + + Example: + :: + encoded = "MzMzPz8/Pz8/Pz8=" + decoded = EmbeddingsHandler.decode_from_base64(encoded, 3) + # Returns approximately [0.1, 0.2, 0.3] + """ + # Decode base64 to bytes + bytes_data = base64.b64decode(encoded) + + # Unpack floats + return list(struct.unpack(f"{dimensions}f", bytes_data)) diff --git a/src/guidellm/mock_server/models.py b/src/guidellm/mock_server/models.py index cd342f7a9..f9fcedfa5 100644 --- a/src/guidellm/mock_server/models.py +++ b/src/guidellm/mock_server/models.py @@ -26,6 +26,9 @@ "CompletionsResponse", "DetokenizeRequest", "DetokenizeResponse", + "EmbeddingObject", + "EmbeddingsRequest", + "EmbeddingsResponse", "ErrorDetail", "ErrorResponse", "StreamOptions", @@ -486,6 +489,82 @@ class DetokenizeResponse(BaseModel): text: str = Field(description="Reconstructed text from tokens") +class EmbeddingsRequest(BaseModel): + """Request parameters for embeddings API endpoints. + + OpenAI-compatible embeddings request supporting both single and batch + input processing with multiple encoding formats and optional parameters. + """ + + input: str | list[str] = Field( + description="Text(s) to generate embeddings for (single string or list)" + ) + model: str = Field(description="Model identifier to use for embeddings") + encoding_format: Literal["float", "base64"] | None = Field( + default="float", + description=( + "Format for embedding output (float array or " + "base64-encoded binary)" + ), + ) + dimensions: int | None = Field( + default=None, + description=( + "Number of dimensions for output embeddings. " + "Supports matryoshka embeddings for models that support it." + ), + ) + truncate_prompt_tokens: int | None = Field( + default=None, + description=( + "Maximum number of tokens to use from input " + "(truncates if exceeded)" + ), + ) + user: str | None = Field( + default=None, + description="User identifier for tracking and abuse monitoring", + ) + + +class EmbeddingObject(BaseModel): + """A single embedding vector in the response. + + Represents one embedded text with its vector representation and + metadata for batch processing. + """ + + object: Literal["embedding"] = Field( + default="embedding", description="Object type identifier" + ) + embedding: list[float] | str = Field( + description=( + "Embedding vector as float list or base64-encoded binary string. " + "Format depends on encoding_format parameter in request." + ) + ) + index: int = Field( + description="Position of this embedding in the input batch (0-indexed)" + ) + + +class EmbeddingsResponse(BaseModel): + """Response containing generated embeddings for input text(s). + + Returns embedding vectors for each input text along with token + usage statistics and model metadata. + """ + + object: Literal["list"] = Field( + default="list", description="Object type identifier" + ) + data: list[EmbeddingObject] = Field( + description="List of embedding objects, one per input text" + ) + model: str = Field(description="Model identifier used for generation") + usage: Usage = Field(description="Token usage statistics for the request") + + class ErrorDetail(BaseModel): """Detailed error information for API failures. diff --git a/src/guidellm/mock_server/server.py b/src/guidellm/mock_server/server.py index e1d3b6860..743a1b6e2 100644 --- a/src/guidellm/mock_server/server.py +++ b/src/guidellm/mock_server/server.py @@ -23,6 +23,7 @@ from guidellm.mock_server.handlers import ( ChatCompletionsHandler, CompletionsHandler, + EmbeddingsHandler, TokenizerHandler, ) @@ -56,6 +57,7 @@ def __init__(self, config: MockServerConfig) -> None: self.app = Sanic("guidellm-mock-server") self.chat_handler = ChatCompletionsHandler(config) self.completions_handler = CompletionsHandler(config) + self.embeddings_handler = EmbeddingsHandler(config) self.tokenizer_handler = TokenizerHandler(config) self._setup_middleware() @@ -114,6 +116,12 @@ async def completions(request: Request): return response.text("", status=204) return await self.completions_handler.handle(request) + @self.app.route("/v1/embeddings", methods=["POST", "OPTIONS"]) + async def embeddings(request: Request): + if request.method == "OPTIONS": + return response.text("", status=204) + return await self.embeddings_handler.handle(request) + @self.app.route("/tokenize", methods=["POST", "OPTIONS"]) async def tokenize(request: Request): if request.method == "OPTIONS": diff --git a/src/guidellm/schemas/__init__.py b/src/guidellm/schemas/__init__.py index 4c78446fe..1ba2b2256 100644 --- a/src/guidellm/schemas/__init__.py +++ b/src/guidellm/schemas/__init__.py @@ -22,6 +22,7 @@ SuccessfulT, TotalT, ) +from .embeddings_request_stats import EmbeddingsRequestStats from .info import RequestInfo, RequestTimings from .request import ( GenerationRequest, @@ -40,6 +41,7 @@ __all__ = [ "BaseModelT", "DistributionSummary", + "EmbeddingsRequestStats", "ErroredT", "FunctionObjT", "GenerationRequest", diff --git a/src/guidellm/schemas/base.py b/src/guidellm/schemas/base.py index cd733b67c..c8f6b6706 100644 --- a/src/guidellm/schemas/base.py +++ b/src/guidellm/schemas/base.py @@ -223,21 +223,21 @@ class StatusBreakdown(BaseModel, Generic[SuccessfulT, ErroredT, IncompleteT, Tot ) """ - successful: SuccessfulT = Field( + successful: SuccessfulT | None = Field( description="Results or metrics for requests with successful completion status", - default=None, # type: ignore[assignment] + default=None, ) - errored: ErroredT = Field( + errored: ErroredT | None = Field( description="Results or metrics for requests with error completion status", - default=None, # type: ignore[assignment] + default=None, ) - incomplete: IncompleteT = Field( + incomplete: IncompleteT | None = Field( description="Results or metrics for requests with incomplete processing status", - default=None, # type: ignore[assignment] + default=None, ) - total: TotalT = Field( + total: TotalT | None = Field( description="Aggregated results or metrics combining all status categories", - default=None, # type: ignore[assignment] + default=None, ) diff --git a/src/guidellm/schemas/embeddings_request_stats.py b/src/guidellm/schemas/embeddings_request_stats.py new file mode 100644 index 000000000..770bea83e --- /dev/null +++ b/src/guidellm/schemas/embeddings_request_stats.py @@ -0,0 +1,136 @@ +""" +Request statistics for embeddings benchmark analysis. + +Provides data structures for capturing and analyzing performance metrics from +embeddings workloads. The module contains request-level statistics including +input token counts, latency measurements, and optional quality validation metrics +such as cosine similarity for evaluating embeddings benchmark performance. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import Field, computed_field + +from guidellm.schemas.base import StandardBaseDict +from guidellm.schemas.info import RequestInfo +from guidellm.schemas.request import UsageMetrics + +__all__ = ["EmbeddingsRequestStats"] + + +class EmbeddingsRequestStats(StandardBaseDict): + """ + Request statistics for embeddings workloads. + + Captures comprehensive performance metrics for individual embeddings requests, + including input token counts, timing measurements, and optional quality validation + metrics. Unlike generative requests, embeddings do not produce output tokens + or have streaming behavior. + + Example: + :: + stats = EmbeddingsRequestStats( + request_id="req_123", + info=request_info, + input_metrics=input_usage + ) + latency = stats.request_latency + """ + + type_: Literal["embeddings_request_stats"] = "embeddings_request_stats" + request_id: str = Field(description="Unique identifier for the request") + response_id: str | None = Field( + default=None, description="Unique identifier matching API Response ID" + ) + request_args: str | None = Field( + default=None, description="Backend arguments used for this request" + ) + info: RequestInfo = Field(description="Request metadata and timing information") + input_metrics: UsageMetrics = Field( + description="Token usage statistics for the input text" + ) + + # Quality validation metrics (optional) + cosine_similarity: float | None = Field( + default=None, + description="Cosine similarity score against baseline model (0.0-1.0)", + ) + encoding_format: str | None = Field( + default="float", + description="Encoding format used for embeddings (float or base64)", + ) + + # Request timing stats + @computed_field # type: ignore[misc] + @property + def request_start_time(self) -> float | None: + """ + :return: Timestamp when the request started, or None if unavailable + """ + return ( + self.info.timings.request_start + if self.info.timings.request_start is not None + else self.info.timings.resolve_start + ) + + @computed_field # type: ignore[misc] + @property + def request_end_time(self) -> float: + """ + :return: Timestamp when the request ended, or None if unavailable + """ + if self.info.timings.resolve_end is None: + raise ValueError("resolve_end timings should be set but is None.") + + return ( + self.info.timings.request_end + if self.info.timings.request_end is not None + else self.info.timings.resolve_end + ) + + @computed_field # type: ignore[misc] + @property + def request_latency(self) -> float | None: + """ + End-to-end request processing latency in seconds. + + :return: Duration from request start to completion, or None if unavailable + """ + start = self.info.timings.request_start + end = self.info.timings.request_end + if start is None or end is None: + return None + + return end - start + + # Input token stats (no output tokens for embeddings) + @computed_field # type: ignore[misc] + @property + def prompt_tokens(self) -> int | None: + """ + :return: Number of tokens in the input text, or None if unavailable + """ + return self.input_metrics.total_tokens + + @computed_field # type: ignore[misc] + @property + def total_tokens(self) -> int | None: + """ + :return: Same as prompt_tokens (embeddings have no output tokens) + """ + return self.prompt_tokens + + @computed_field # type: ignore[misc] + @property + def input_tokens_timing(self) -> tuple[float, float]: + """ + Timing tuple for input token processing. + + :return: Tuple of (timestamp, token_count) for input processing + """ + return ( + self.request_end_time, + self.prompt_tokens or 0.0, + ) diff --git a/src/guidellm/schemas/statistics.py b/src/guidellm/schemas/statistics.py index 17f2f2ddf..74dfd5a50 100644 --- a/src/guidellm/schemas/statistics.py +++ b/src/guidellm/schemas/statistics.py @@ -655,14 +655,14 @@ def count(self) -> int: """ :return: Total count of samples across all status categories """ - return self.total.count + return self.total.count if self.total is not None else 0 @property def total_sum(self) -> float: """ :return: Total sum of values across all status categories """ - return self.total.total_sum + return self.total.total_sum if self.total is not None else 0.0 @classmethod def from_values( diff --git a/src/guidellm/settings.py b/src/guidellm/settings.py index 0e6e6c455..df14a6554 100644 --- a/src/guidellm/settings.py +++ b/src/guidellm/settings.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import sys from collections.abc import Sequence from enum import Enum from typing import Literal @@ -38,6 +39,24 @@ class Environment(str, Enum): } +def _get_default_mp_context_type() -> Literal["spawn", "fork", "forkserver"]: + """ + Get the default multiprocessing context type based on the platform. + + On macOS (darwin), 'fork' is unsafe and causes issues with asyncio and + multiprocessing queues. Use 'spawn' instead. On Linux, 'fork' is the + default and generally works well. + + :return: The recommended multiprocessing context type for the platform + """ + if sys.platform == "darwin": + # macOS: fork is unsafe, use spawn + return "spawn" + else: + # Linux and others: fork is generally safe and faster + return "fork" + + class LoggingSettings(BaseModel): """ Logging settings for the application @@ -108,13 +127,10 @@ class Settings(BaseSettings): logging: LoggingSettings = LoggingSettings() default_sweep_number: int = 10 - # HTTP settings - request_follow_redirects: bool = True - request_timeout: int = 60 * 5 # 5 minutes - request_http2: bool = True - # Scheduler settings - mp_context_type: Literal["spawn", "fork", "forkserver"] | None = "fork" + mp_context_type: Literal["spawn", "fork", "forkserver"] | None = Field( + default_factory=_get_default_mp_context_type + ) mp_serialization: Literal["dict", "sequence"] | None = "dict" mp_encoding: ( Literal["msgpack", "msgspec"] @@ -135,14 +151,6 @@ class Settings(BaseSettings): # Data settings dataset: DatasetSettings = DatasetSettings() - # Request/stats settings - preferred_prompt_tokens_source: Literal["request", "response"] = "response" - preferred_output_tokens_source: Literal["request", "response"] = "response" - preferred_backend: Literal["openai"] = "openai" - preferred_route: Literal["text_completions", "chat_completions"] = ( - "chat_completions" - ) - # Report settings report_generation: ReportGenerationSettings = ReportGenerationSettings() diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py index 37f2e8d36..e13c34da6 100644 --- a/src/guidellm/utils/text.py +++ b/src/guidellm/utils/text.py @@ -20,7 +20,6 @@ import httpx from loguru import logger -from guidellm.settings import settings from guidellm.utils.console import Colors __all__ = [ @@ -232,7 +231,7 @@ def load_text(data: str | Path, encoding: str | None = None) -> str: # check URLs if isinstance(data, str) and data.strip().startswith(("http", "ftp")): - with httpx.Client(timeout=settings.request_timeout) as client: + with httpx.Client() as client: response = client.get(data.strip()) response.raise_for_status() return response.text diff --git a/tests/e2e/test_embeddings_benchmark.py b/tests/e2e/test_embeddings_benchmark.py new file mode 100644 index 000000000..8eed769e3 --- /dev/null +++ b/tests/e2e/test_embeddings_benchmark.py @@ -0,0 +1,590 @@ +# E2E tests for embeddings benchmark scenarios + +import json +import subprocess +import sys +import time +from pathlib import Path + +import pytest +import requests +from loguru import logger + + +class EmbeddingsMockServer: + """Mock server for embeddings E2E tests using guidellm mock-server.""" + + def __init__(self, port: int, model: str = "BAAI/bge-base-en-v1.5"): + self.port = port + self.model = model + self.server_url = f"http://127.0.0.1:{self.port}" + self.health_url = f"{self.server_url}/health" + self.process: subprocess.Popen | None = None + + def get_guidellm_executable(self) -> str: + """Get the path to the guidellm executable in the current environment.""" + python_bin_dir = Path(sys.executable).parent + guidellm_path = python_bin_dir / "guidellm" + if guidellm_path.exists(): + return str(guidellm_path) + return "guidellm" + + def start(self): + """Start the mock embeddings server.""" + guidellm_exe = self.get_guidellm_executable() + + logger.info(f"Starting embeddings mock server on {self.server_url}...") + command = [ + guidellm_exe, + "mock-server", + "--port", + str(self.port), + "--model", + self.model, + ] + logger.info(f"Server command: {' '.join(command)}") + + self.process = subprocess.Popen( # noqa: S603 + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Wait for server to become healthy + max_retries = 30 + retry_delay_sec = 0.5 + for i in range(max_retries): + try: + response = requests.get(self.health_url, timeout=1) + if response.status_code == 200: + logger.info(f"Embeddings mock server started at {self.server_url}") + return + except requests.RequestException: + pass + + if i < max_retries - 1: + time.sleep(retry_delay_sec) + + # Server didn't start, terminate and raise + self.stop() + raise RuntimeError( + f"Embeddings mock server failed to start after {max_retries} retries" + ) + + def stop(self): + """Stop the mock server.""" + if self.process and self.process.poll() is None: + logger.info("Stopping embeddings mock server...") + self.process.terminate() + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + logger.warning("Server did not terminate gracefully, killing it...") + self.process.kill() + self.process.wait() + logger.info("Embeddings mock server stopped.") + + def get_url(self) -> str: + """Get the server URL.""" + return self.server_url + + +class EmbeddingsClient: + """Wrapper for running guidellm embeddings benchmark commands.""" + + def __init__( + self, target: str, output_dir: Path, outputs: str = "embeddings_benchmarks.json" + ): + self.target = target + self.output_dir = output_dir + self.outputs = outputs + self.process: subprocess.Popen | None = None + self.stdout: str | None = None + self.stderr: str | None = None + + def get_guidellm_executable(self) -> str: + """Get the path to the guidellm executable.""" + python_bin_dir = Path(sys.executable).parent + guidellm_path = python_bin_dir / "guidellm" + if guidellm_path.exists(): + return str(guidellm_path) + return "guidellm" + + def start_benchmark( + self, + data: str = "Benchmark this text for embeddings quality", + profile: str = "constant", + rate: int = 10, + max_requests: int | None = None, + max_duration: int | None = None, + encoding_format: str = "float", + enable_quality_validation: bool = False, + baseline_model: str | None = None, + quality_tolerance: float | None = None, + processor: str | None = None, + additional_args: str = "", + ): + """Start embeddings benchmark command.""" + guidellm_exe = self.get_guidellm_executable() + + # Build command components + cmd_parts = [ + f"HF_HOME={self.output_dir / 'huggingface_cache'}", + f"{guidellm_exe} benchmark embeddings", + f"--target {self.target}", + f"--data '{data}'", + f"--profile {profile}", + f"--rate {rate}", + f"--encoding-format {encoding_format}", + f"--output-dir {self.output_dir}", + f"--outputs {self.outputs}", + ] + + if max_requests is not None: + cmd_parts.append(f"--max-requests {max_requests}") + + if max_duration is not None: + cmd_parts.append(f"--max-duration {max_duration}") + + if enable_quality_validation: + cmd_parts.append("--enable-quality-validation") + + if baseline_model is not None: + cmd_parts.append(f"--baseline-model {baseline_model}") + + if quality_tolerance is not None: + cmd_parts.append(f"--quality-tolerance {quality_tolerance}") + + if processor is not None: + cmd_parts.append(f"--processor {processor}") + + if additional_args: + cmd_parts.append(additional_args) + + command = " \\\n ".join(cmd_parts) + logger.info(f"Embeddings benchmark command: {command}") + + self.process = subprocess.Popen( # noqa: S603 + ["/bin/sh", "-c", command], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + def wait_for_completion(self, timeout: int = 30): + """Wait for the benchmark to complete.""" + if self.process is None: + raise RuntimeError("No process started. Call start_benchmark() first.") + + try: + logger.info("Waiting for embeddings benchmark to complete...") + self.stdout, self.stderr = self.process.communicate(timeout=timeout) + logger.debug(f"Benchmark stdout:\n{self.stdout}") + logger.debug(f"Benchmark stderr:\n{self.stderr}") + except subprocess.TimeoutExpired: + logger.warning("Benchmark did not complete within timeout, terminating...") + self.process.terminate() + try: + self.stdout, self.stderr = self.process.communicate(timeout=5) + except subprocess.TimeoutExpired: + logger.warning("Benchmark did not terminate gracefully, killing it...") + self.process.kill() + self.stdout, self.stderr = self.process.communicate() + + +@pytest.fixture(scope="module") +def embeddings_server(): + """Pytest fixture to start and stop embeddings mock server.""" + server = EmbeddingsMockServer(port=8001, model="test-embedding-model") + try: + server.start() + yield server + finally: + server.stop() + + +def assert_no_python_exceptions(stderr: str | None) -> None: + """Assert that stderr does not contain Python exception indicators.""" + if stderr is None: + return + + python_exception_indicators = [ + "Traceback (most recent call last):", + "AttributeError:", + "ValueError:", + "TypeError:", + "KeyError:", + "IndexError:", + "NameError:", + "ImportError:", + "RuntimeError:", + ] + + for indicator in python_exception_indicators: + assert indicator not in stderr, f"Python exception detected: {indicator}" + + +def load_embeddings_report(report_path: Path) -> dict: + """Load and validate embeddings benchmark report.""" + assert report_path.exists(), f"Report file does not exist: {report_path}" + + with report_path.open("r") as f: + report = json.load(f) + + assert "type_" in report, "Report missing 'type_' field" + assert report["type_"] == "embeddings_benchmarks_report", ( + f"Expected embeddings_benchmarks_report, got {report['type_']}" + ) + assert "benchmarks" in report, "Report missing 'benchmarks' field" + assert len(report["benchmarks"]) > 0, "Report contains no benchmarks" + + return report + + +def assert_embeddings_request_fields(requests: list) -> None: + """Assert that embeddings requests contain expected fields.""" + assert len(requests) >= 1, "No requests found" + + for request in requests: + # Basic fields + assert "request_id" in request, "Missing 'request_id' field" + assert "request_latency" in request, "Missing 'request_latency' field" + assert request["request_latency"] > 0, "request_latency should be > 0" + + # Input token metrics (no output tokens for embeddings) + assert "prompt_tokens" in request, "Missing 'prompt_tokens' field" + assert request["prompt_tokens"] > 0, "prompt_tokens should be > 0" + + assert "total_tokens" in request, "Missing 'total_tokens' field" + assert request["total_tokens"] > 0, "total_tokens should be > 0" + + # Should NOT have output token fields + assert "output_tokens" not in request or request["output_tokens"] is None, ( + "Embeddings should not have output_tokens" + ) + + # Should NOT have streaming fields + assert "time_to_first_token_ms" not in request, ( + "Embeddings should not have time_to_first_token_ms" + ) + assert "inter_token_latency_ms" not in request, ( + "Embeddings should not have inter_token_latency_ms" + ) + + # Encoding format + assert "encoding_format" in request, "Missing 'encoding_format' field" + assert request["encoding_format"] in ["float", "base64"], ( + f"Invalid encoding_format: {request['encoding_format']}" + ) + + +@pytest.mark.timeout(30) +@pytest.mark.sanity +def test_basic_embeddings_benchmark( + embeddings_server: EmbeddingsMockServer, tmp_path: Path +): + """Test basic embeddings benchmark execution.""" + report_name = "basic_embeddings.json" + report_path = tmp_path / report_name + + client = EmbeddingsClient( + target=embeddings_server.get_url(), + output_dir=tmp_path, + outputs=report_name, + ) + + client.start_benchmark( + data=["Test embeddings benchmark"], + max_requests=10, + processor="gpt2", + ) + + client.wait_for_completion(timeout=30) + + # Assert no Python exceptions + assert_no_python_exceptions(client.stderr) + + # Load and validate report + report = load_embeddings_report(report_path) + benchmark = report["benchmarks"][0] + + # Validate requests + successful_requests = benchmark["requests"]["successful"] + assert len(successful_requests) == 10, ( + f"Expected 10 successful requests, got {len(successful_requests)}" + ) + assert_embeddings_request_fields(successful_requests) + + # Validate metrics structure + metrics = benchmark["metrics"] + assert "request_totals" in metrics + assert "input_tokens_count" in metrics + assert "encoding_format_breakdown" in metrics + + # Should NOT have output token metrics + assert "output_tokens_count" not in metrics, ( + "Embeddings metrics should not have output_tokens_count" + ) + + +@pytest.mark.timeout(30) +@pytest.mark.sanity +def test_embeddings_float_encoding( + embeddings_server: EmbeddingsMockServer, tmp_path: Path +): + """Test embeddings benchmark with float encoding format.""" + report_name = "float_encoding_embeddings.json" + report_path = tmp_path / report_name + + client = EmbeddingsClient( + target=embeddings_server.get_url(), + output_dir=tmp_path, + outputs=report_name, + ) + + client.start_benchmark( + data=["Test float encoding"], + max_requests=5, + encoding_format="float", + processor="gpt2", + ) + + client.wait_for_completion(timeout=30) + assert_no_python_exceptions(client.stderr) + + report = load_embeddings_report(report_path) + benchmark = report["benchmarks"][0] + + # Check encoding format + successful_requests = benchmark["requests"]["successful"] + for request in successful_requests: + assert request["encoding_format"] == "float" + + # Check encoding_format_breakdown in metrics + metrics = benchmark["metrics"] + assert "float" in metrics["encoding_format_breakdown"] + assert metrics["encoding_format_breakdown"]["float"] == 5 + + +@pytest.mark.timeout(30) +@pytest.mark.sanity +def test_embeddings_base64_encoding( + embeddings_server: EmbeddingsMockServer, tmp_path: Path +): + """Test embeddings benchmark with base64 encoding format.""" + report_name = "base64_encoding_embeddings.json" + report_path = tmp_path / report_name + + client = EmbeddingsClient( + target=embeddings_server.get_url(), + output_dir=tmp_path, + outputs=report_name, + ) + + client.start_benchmark( + data=["Test base64 encoding"], + max_requests=5, + encoding_format="base64", + processor="gpt2", + ) + + client.wait_for_completion(timeout=30) + assert_no_python_exceptions(client.stderr) + + report = load_embeddings_report(report_path) + benchmark = report["benchmarks"][0] + + # Check encoding format + successful_requests = benchmark["requests"]["successful"] + for request in successful_requests: + assert request["encoding_format"] == "base64" + + # Check encoding_format_breakdown in metrics + metrics = benchmark["metrics"] + assert "base64" in metrics["encoding_format_breakdown"] + assert metrics["encoding_format_breakdown"]["base64"] == 5 + + +@pytest.mark.timeout(60) +@pytest.mark.sanity +def test_embeddings_csv_output( + embeddings_server: EmbeddingsMockServer, tmp_path: Path +): + """Test embeddings benchmark CSV output generation.""" + client = EmbeddingsClient( + target=embeddings_server.get_url(), + output_dir=tmp_path, + outputs="json,csv", + ) + + client.start_benchmark( + data=["Test CSV output"], + max_requests=5, + processor="gpt2", + ) + + client.wait_for_completion(timeout=60) + assert_no_python_exceptions(client.stderr) + + # Check both JSON and CSV files exist + json_path = tmp_path / "embeddings_benchmarks.json" + csv_path = tmp_path / "embeddings_benchmarks.csv" + + assert json_path.exists(), "JSON output file not created" + assert csv_path.exists(), "CSV output file not created" + + # Validate CSV has content + csv_content = csv_path.read_text() + assert len(csv_content) > 0, "CSV file is empty" + assert "request_latency" in csv_content, "CSV missing request_latency column" + assert "prompt_tokens" in csv_content, "CSV missing prompt_tokens column" + + +@pytest.mark.timeout(60) +@pytest.mark.sanity +def test_embeddings_html_output( + embeddings_server: EmbeddingsMockServer, tmp_path: Path +): + """Test embeddings benchmark HTML output generation.""" + client = EmbeddingsClient( + target=embeddings_server.get_url(), + output_dir=tmp_path, + outputs="json,html", + ) + + client.start_benchmark( + data=["Test HTML output"], + max_requests=5, + processor="gpt2", + ) + + client.wait_for_completion(timeout=60) + assert_no_python_exceptions(client.stderr) + + # Check both JSON and HTML files exist + json_path = tmp_path / "embeddings_benchmarks.json" + html_path = tmp_path / "embeddings_benchmarks.html" + + assert json_path.exists(), "JSON output file not created" + assert html_path.exists(), "HTML output file not created" + + # Validate HTML has content + html_content = html_path.read_text() + assert len(html_content) > 0, "HTML file is empty" + assert " Percentiles: + """Helper to create Percentiles with all required fields.""" + return Percentiles( + p001=p50 * 0.5, + p01=p50 * 0.6, + p05=p50 * 0.7, + p10=p50 * 0.8, + p25=p50 * 0.9, + p50=p50, + p75=p50 * 1.05, + p90=p50 * 1.1, + p95=p50 * 1.15, + p99=p50 * 1.2, + p999=p50 * 1.25, + ) + + +def create_distribution_summary( + mean=0.5, + median=0.5, + mode=0.5, + variance=0.01, + std_dev=0.1, + min_val=0.1, + max_val=1.0, + count=100, + total_sum=50.0, +) -> DistributionSummary: + """Helper to create DistributionSummary with all required fields.""" + return DistributionSummary( + mean=mean, + median=median, + mode=mode, + variance=variance, + std_dev=std_dev, + min=min_val, + max=max_val, + count=count, + total_sum=total_sum, + percentiles=create_percentiles(median), + ) + + +@pytest.fixture +def sample_benchmark() -> EmbeddingsBenchmark: + """Create a sample embeddings benchmark for testing.""" + # Create basic scheduler state + scheduler_state = SchedulerState( + request_count=10, + successful_count=10, + incomplete_count=0, + errored_count=0, + ) + + scheduler_metrics = SchedulerMetrics( + start_time=0.0, + request_start_time=0.1, + measure_start_time=1.0, + measure_end_time=9.0, + request_end_time=9.9, + end_time=10.0, + requests_made=StatusBreakdown(successful=10, incomplete=0, errored=0, total=10), + queued_time_avg=0.01, + resolve_start_delay_avg=0.005, + resolve_targeted_start_delay_avg=0.002, + request_start_delay_avg=0.003, + resolve_time_avg=0.15, + ) + + # Create quality metrics + quality_metrics = EmbeddingsQualityMetrics( + baseline_cosine_similarity=StatusDistributionSummary( + successful=create_distribution_summary( + mean=0.98, median=0.985, count=10, total_sum=9.8 + ), + errored=None, + incomplete=None, + total=None, + ), + mteb_main_score=75.5, + mteb_task_scores={"STS12": 72.3, "STS13": 78.1}, + ) + + # Create metrics + latency_dist = create_distribution_summary( + mean=0.15, median=0.14, count=10, total_sum=1.5 + ) + metrics = EmbeddingsMetrics( + request_totals=StatusBreakdown( + successful=10, incomplete=0, errored=0, total=10 + ), + requests_per_second=StatusDistributionSummary( + successful=create_distribution_summary( + mean=20.0, count=10, total_sum=200.0 + ), + errored=None, + incomplete=None, + total=create_distribution_summary( + mean=20.0, count=10, total_sum=200.0 + ), + ), + request_concurrency=StatusDistributionSummary( + successful=create_distribution_summary(mean=2.0, count=10, total_sum=20.0), + errored=None, + incomplete=None, + total=create_distribution_summary(mean=2.0, count=10, total_sum=20.0), + ), + request_latency=StatusDistributionSummary( + successful=latency_dist, + errored=None, + incomplete=None, + total=latency_dist, + ), + input_tokens_count=StatusBreakdown( + successful=500, incomplete=0, errored=0, total=500 + ), + input_tokens_per_second=StatusDistributionSummary( + successful=create_distribution_summary( + mean=100.0, count=10, total_sum=1000.0 + ), + errored=None, + incomplete=None, + total=create_distribution_summary(mean=100.0, count=10, total_sum=1000.0), + ), + quality=quality_metrics, + encoding_format_breakdown={"float": 7, "base64": 3}, + ) + + # Create sample request stats + successful_requests = [] + for i in range(10): + info = RequestInfo(request_id=f"req-{i}", status="completed") + info.timings.request_start = float(i) + info.timings.request_end = float(i) + 0.15 + info.timings.resolve_end = float(i) + 0.15 + + stats = EmbeddingsRequestStats( + request_id=f"req-{i}", + info=info, + input_metrics=UsageMetrics(text_tokens=50), + encoding_format="float" if i < 7 else "base64", + cosine_similarity=0.98 if i % 2 == 0 else None, + ) + successful_requests.append(stats) + + requests = StatusBreakdown( + successful=successful_requests, + incomplete=[], + errored=[], + total=None, + ) + + # Create a minimal config (we won't use most fields for output testing) + from guidellm.scheduler import SynchronousStrategy + + config = BenchmarkConfig( + run_id="test-run-001", + run_index=0, + strategy=SynchronousStrategy(rate=10), + constraints={}, + profile=SynchronousProfile(rate=10), + requests={ + "type": "embeddings", + "model": "test-embedding-model", + }, + backend={ + "type": "openai_http", + "url": "http://localhost:8000", + }, + environment={ + "platform": "test", + "python_version": "3.11", + }, + ) + + return EmbeddingsBenchmark( + config=config, + scheduler_state=scheduler_state, + scheduler_metrics=scheduler_metrics, + metrics=metrics, + requests=requests, + start_time=0.0, + end_time=10.0, + duration=10.0, + warmup_duration=1.0, + cooldown_duration=1.0, + ) + + +@pytest.fixture +def sample_report(sample_benchmark: EmbeddingsBenchmark) -> EmbeddingsBenchmarksReport: + """Create a sample embeddings benchmark report for testing.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + model="test-embedding-model", + backend="openai_http", + enable_quality_validation=True, + baseline_model="sentence-transformers/all-MiniLM-L6-v2", + encoding_format="float", + ) + + return EmbeddingsBenchmarksReport( + benchmarks=[sample_benchmark], + args=args, + metadata=EmbeddingsBenchmarkMetadata(), + ) + + +class TestEmbeddingsBenchmarkerSerialized: + """Tests for EmbeddingsBenchmarkerSerialized (JSON/YAML output).""" + + @pytest.mark.smoke + def test_class_registration(self): + """Test that serialized formatter is properly registered.""" + from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput + + # Should be registered for both json and yaml + assert "json" in EmbeddingsBenchmarkerOutput.registry + assert "yaml" in EmbeddingsBenchmarkerOutput.registry + assert ( + EmbeddingsBenchmarkerOutput.registry["json"] + == EmbeddingsBenchmarkerSerialized + ) + + @pytest.mark.smoke + def test_validated_kwargs(self): + """Test validated_kwargs normalizes paths correctly.""" + # Test with string path + kwargs = EmbeddingsBenchmarkerSerialized.validated_kwargs( + output_path="/tmp/test.json" # noqa: S108 + ) + assert "output_path" in kwargs + assert isinstance(kwargs["output_path"], Path) + assert str(kwargs["output_path"]) == "/tmp/test.json" # noqa: S108 + + # Test with Path object + path_obj = Path("/tmp/test.json") # noqa: S108 + kwargs = EmbeddingsBenchmarkerSerialized.validated_kwargs( + output_path=path_obj + ) + assert kwargs["output_path"] == path_obj + + # Test with None + kwargs = EmbeddingsBenchmarkerSerialized.validated_kwargs(output_path=None) + assert "output_path" not in kwargs + + @pytest.mark.asyncio + @pytest.mark.sanity + async def test_finalize_json( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test finalizing report to JSON file.""" + output_file = tmp_path / "test_embeddings.json" + formatter = EmbeddingsBenchmarkerSerialized(output_path=output_file) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + assert result_path == output_file + assert result_path.suffix == ".json" + + # Validate JSON content + with result_path.open("r") as f: + data = json.load(f) + + assert data["type_"] == "embeddings_benchmarks_report" + assert len(data["benchmarks"]) == 1 + assert "metadata" in data + assert "args" in data + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_finalize_yaml( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test finalizing report to YAML file.""" + output_file = tmp_path / "test_embeddings.yaml" + formatter = EmbeddingsBenchmarkerSerialized(output_path=output_file) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + assert result_path == output_file + assert result_path.suffix in [".yaml", ".yml"] + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_finalize_directory( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test finalizing with directory path (should use default filename).""" + formatter = EmbeddingsBenchmarkerSerialized(output_path=tmp_path) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + assert result_path.parent == tmp_path + # Default behavior should create a file with some name + assert result_path.suffix in [".json", ".yaml", ".yml"] + + +class TestEmbeddingsBenchmarkerCSV: + """Tests for EmbeddingsBenchmarkerCSV output formatter.""" + + @pytest.mark.smoke + def test_class_registration(self): + """Test that CSV formatter is properly registered.""" + from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput + + assert "csv" in EmbeddingsBenchmarkerOutput.registry + assert ( + EmbeddingsBenchmarkerOutput.registry["csv"] == EmbeddingsBenchmarkerCSV + ) + + @pytest.mark.smoke + def test_default_filename(self): + """Test default CSV filename.""" + assert EmbeddingsBenchmarkerCSV.DEFAULT_FILE == "embeddings_benchmarks.csv" + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_csv_creates_file( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test that finalize creates a valid CSV file.""" + output_file = tmp_path / "test_embeddings.csv" + formatter = EmbeddingsBenchmarkerCSV(output_path=output_file) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + assert result_path == output_file + assert result_path.suffix == ".csv" + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_csv_structure( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test CSV has correct structure and headers.""" + output_file = tmp_path / "test_embeddings.csv" + formatter = EmbeddingsBenchmarkerCSV(output_path=output_file) + + await formatter.finalize(sample_report) + + # Read CSV and check structure + with output_file.open("r") as f: + reader = csv.reader(f) + rows = list(reader) + + # Should have at least header rows + data rows + assert len(rows) >= 4 # Multi-row header + at least 1 data row + + # Check for embeddings-specific headers (no output tokens or streaming) + csv_text = output_file.read_text() + assert "Request Latency" in csv_text + assert "Input Tokens" in csv_text + + # Should NOT have output token or streaming headers + assert "Output Tokens" not in csv_text + assert "Time to First Token" not in csv_text + assert "Inter Token Latency" not in csv_text + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_csv_quality_metrics( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test CSV includes quality validation metrics.""" + output_file = tmp_path / "test_embeddings.csv" + formatter = EmbeddingsBenchmarkerCSV(output_path=output_file) + + await formatter.finalize(sample_report) + + csv_text = output_file.read_text() + + # Check for quality metrics + assert "Cosine Similarity" in csv_text or "Quality" in csv_text + assert "MTEB" in csv_text + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_csv_encoding_formats( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test CSV includes encoding format breakdown.""" + output_file = tmp_path / "test_embeddings.csv" + formatter = EmbeddingsBenchmarkerCSV(output_path=output_file) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + csv_text = result_path.read_text() + + # Check that CSV contains benchmark data (encoding format breakdown + # is stored in metrics but not separately exported to CSV) + assert "test-embedding-model" in csv_text + assert len(csv_text) > 0 + + @pytest.mark.regression + @pytest.mark.asyncio + async def test_csv_directory_path( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test CSV creation with directory path.""" + formatter = EmbeddingsBenchmarkerCSV(output_path=tmp_path) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + assert result_path.parent == tmp_path + assert result_path.name == EmbeddingsBenchmarkerCSV.DEFAULT_FILE + + +class TestEmbeddingsBenchmarkerHTML: + """Tests for EmbeddingsBenchmarkerHTML output formatter.""" + + @pytest.mark.smoke + def test_class_registration(self): + """Test that HTML formatter is properly registered.""" + from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput + + assert "html" in EmbeddingsBenchmarkerOutput.registry + assert ( + EmbeddingsBenchmarkerOutput.registry["html"] == EmbeddingsBenchmarkerHTML + ) + + @pytest.mark.smoke + def test_default_filename(self): + """Test default HTML filename.""" + assert EmbeddingsBenchmarkerHTML.DEFAULT_FILE == "embeddings_benchmarks.html" + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_html_creates_file( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test that finalize creates a valid HTML file.""" + output_file = tmp_path / "test_embeddings.html" + formatter = EmbeddingsBenchmarkerHTML(output_path=output_file) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + assert result_path == output_file + assert result_path.suffix == ".html" + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_html_structure( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test HTML file has valid structure.""" + output_file = tmp_path / "test_embeddings.html" + formatter = EmbeddingsBenchmarkerHTML(output_path=output_file) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + html_content = result_path.read_text() + + # Check basic HTML structure + assert "" in html_content + assert "" in html_content + assert "" in html_content + assert "" in html_content + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_html_embeddings_data( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test HTML contains embeddings-specific data.""" + output_file = tmp_path / "test_embeddings.html" + formatter = EmbeddingsBenchmarkerHTML(output_path=output_file) + + result_path = await formatter.finalize(sample_report) + + html_content = result_path.read_text() + + # Check for embedded data and embeddings-specific content + assert "uiApiData" in html_content + assert ( + "embeddings" in html_content.lower() + or "embedding" in html_content.lower() + ) + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_html_no_streaming_metrics( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test HTML does not include streaming metrics.""" + output_file = tmp_path / "test_embeddings.html" + formatter = EmbeddingsBenchmarkerHTML(output_path=output_file) + + await formatter.finalize(sample_report) + + html_content = output_file.read_text() + + # Should NOT have streaming-related content + assert "Time to First Token" not in html_content + assert "TTFT" not in html_content + assert "Inter Token Latency" not in html_content + assert "ITL" not in html_content + + @pytest.mark.regression + @pytest.mark.asyncio + async def test_html_directory_path( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test HTML creation with directory path.""" + formatter = EmbeddingsBenchmarkerHTML(output_path=tmp_path) + + result_path = await formatter.finalize(sample_report) + + assert result_path.exists() + assert result_path.parent == tmp_path + assert result_path.name == EmbeddingsBenchmarkerHTML.DEFAULT_FILE + + +class TestEmbeddingsBenchmarkerConsole: + """Tests for EmbeddingsBenchmarkerConsole output formatter.""" + + @pytest.mark.smoke + def test_class_registration(self): + """Test that console formatter is properly registered.""" + from guidellm.benchmark.outputs.output import EmbeddingsBenchmarkerOutput + + assert "console" in EmbeddingsBenchmarkerOutput.registry + assert ( + EmbeddingsBenchmarkerOutput.registry["console"] + == EmbeddingsBenchmarkerConsole + ) + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_console_finalize( + self, sample_report: EmbeddingsBenchmarksReport + ): + """Test that console formatter finalize returns None (no file output).""" + formatter = EmbeddingsBenchmarkerConsole() + + result = await formatter.finalize(sample_report) + + # Console formatter doesn't write to file, should return None or empty Path + assert result is None or (isinstance(result, Path) and not result.exists()) + + @pytest.mark.regression + def test_console_instantiation(self): + """Test console formatter can be instantiated.""" + formatter = EmbeddingsBenchmarkerConsole() + assert formatter is not None + assert isinstance(formatter, EmbeddingsBenchmarkerConsole) + + +class TestOutputFormattersIntegration: + """Integration tests for output formatters working together.""" + + @pytest.mark.sanity + @pytest.mark.asyncio + async def test_integration_multiple_formats( + self, sample_report: EmbeddingsBenchmarksReport, tmp_path: Path + ): + """Test that all formatters can process the same report.""" + # JSON + json_formatter = EmbeddingsBenchmarkerSerialized( + output_path=tmp_path / "test.json" + ) + json_path = await json_formatter.finalize(sample_report) + assert json_path.exists() + + # CSV + csv_formatter = EmbeddingsBenchmarkerCSV(output_path=tmp_path / "test.csv") + csv_path = await csv_formatter.finalize(sample_report) + assert csv_path.exists() + + # HTML + html_formatter = EmbeddingsBenchmarkerHTML(output_path=tmp_path / "test.html") + html_path = await html_formatter.finalize(sample_report) + assert html_path.exists() + + # Console + console_formatter = EmbeddingsBenchmarkerConsole() + console_result = await console_formatter.finalize(sample_report) + # Console doesn't write files, returns None + assert console_result is None + + @pytest.mark.regression + @pytest.mark.asyncio + async def test_empty_report_handling(self, tmp_path: Path): + """Test formatters handle reports with no benchmarks gracefully.""" + # Create report with no benchmarks + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + model="test-model", + ) + empty_report = EmbeddingsBenchmarksReport( + benchmarks=[], + args=args, + metadata=EmbeddingsBenchmarkMetadata(), + ) + + # JSON should still work + json_formatter = EmbeddingsBenchmarkerSerialized( + output_path=tmp_path / "empty.json" + ) + json_path = await json_formatter.finalize(empty_report) + assert json_path.exists() + + # Verify JSON content is valid + with json_path.open("r") as f: + data = json.load(f) + assert data["type_"] == "embeddings_benchmarks_report" + assert len(data["benchmarks"]) == 0 diff --git a/tests/unit/benchmark/quality/__init__.py b/tests/unit/benchmark/quality/__init__.py new file mode 100644 index 000000000..f1791286e --- /dev/null +++ b/tests/unit/benchmark/quality/__init__.py @@ -0,0 +1 @@ +"""Unit tests for embeddings quality validation.""" diff --git a/tests/unit/benchmark/quality/test_mteb_integration.py b/tests/unit/benchmark/quality/test_mteb_integration.py new file mode 100644 index 000000000..6546e586d --- /dev/null +++ b/tests/unit/benchmark/quality/test_mteb_integration.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +import pytest + +# Skip all tests if sentence-transformers/mteb aren't available +pytest.importorskip("sentence_transformers", reason="sentence-transformers required") +pytest.importorskip("mteb", reason="mteb required") + +from guidellm.benchmark.quality.mteb_integration import ( + DEFAULT_MTEB_TASKS, + MTEBValidator, +) + + +class TestMTEBValidator: + """Tests for MTEB benchmark integration.""" + + @pytest.fixture + def validator(self): + """Create a validator with a test model and minimal tasks.""" + # Use a small, fast model and single task for faster tests + return MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2", + task_names=["STS12"], # Single lightweight task + ) + + @pytest.mark.smoke + def test_initialization(self, validator): + """Test validator initialization.""" + assert validator is not None + assert validator.model is not None + assert validator.task_names == ["STS12"] + + @pytest.mark.smoke + def test_initialization_default_tasks(self): + """Test initialization with default MTEB tasks.""" + validator = MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2" + ) + + assert validator.task_names == DEFAULT_MTEB_TASKS + + @pytest.mark.sanity + def test_initialization_multiple_tasks(self): + """Test initialization with multiple tasks.""" + tasks = ["STS12", "STS13", "STSBenchmark"] + validator = MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2", + task_names=tasks, + ) + + assert validator.task_names == tasks + assert len(validator.task_names) == 3 + + @pytest.mark.sanity + @pytest.mark.slow + def test_run_evaluation_single_task(self, validator): + """Test running MTEB evaluation with single task.""" + results = validator.run_evaluation() + + assert isinstance(results, dict) + assert "mteb_main_score" in results + assert "mteb_task_scores" in results + + # Main score should be a float + assert isinstance(results["mteb_main_score"], float) + + # Task scores should be a dict + assert isinstance(results["mteb_task_scores"], dict) + assert "STS12" in results["mteb_task_scores"] + + @pytest.mark.sanity + @pytest.mark.slow + def test_run_evaluation_score_range(self, validator): + """Test that MTEB scores are in valid range.""" + results = validator.run_evaluation() + + # MTEB scores should be between 0 and 100 + assert 0.0 <= results["mteb_main_score"] <= 100.0 + + for _task_name, score in results["mteb_task_scores"].items(): + assert 0.0 <= score <= 100.0 + + @pytest.mark.regression + @pytest.mark.slow + def test_run_evaluation_multiple_tasks(self): + """Test running MTEB evaluation with multiple tasks.""" + tasks = ["STS12", "STS13"] + validator = MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2", + task_names=tasks, + ) + + results = validator.run_evaluation() + + assert "mteb_main_score" in results + assert "mteb_task_scores" in results + + # Should have scores for both tasks + assert len(results["mteb_task_scores"]) == len(tasks) + for task in tasks: + assert task in results["mteb_task_scores"] + + @pytest.mark.regression + @pytest.mark.slow + def test_main_score_is_average(self): + """Test that main score is average of task scores.""" + tasks = ["STS12", "STS13"] + validator = MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2", + task_names=tasks, + ) + + results = validator.run_evaluation() + + # Calculate expected average + task_scores = list(results["mteb_task_scores"].values()) + expected_avg = sum(task_scores) / len(task_scores) + + # Main score should be close to average + assert results["mteb_main_score"] == pytest.approx(expected_avg, abs=0.1) + + @pytest.mark.sanity + def test_default_mteb_tasks_constant(self): + """Test that DEFAULT_MTEB_TASKS contains expected tasks.""" + assert isinstance(DEFAULT_MTEB_TASKS, list) + assert len(DEFAULT_MTEB_TASKS) > 0 + + # Should contain STS tasks (standard for embeddings) + assert any("STS" in task for task in DEFAULT_MTEB_TASKS) + + @pytest.mark.smoke + def test_model_loaded(self, validator): + """Test that SentenceTransformer model is loaded.""" + assert validator.model is not None + + # Should be able to encode text + embedding = validator.model.encode("Test sentence.") + assert embedding is not None + assert len(embedding) > 0 + + @pytest.mark.regression + def test_task_names_stored(self, validator): + """Test that task names are stored correctly.""" + assert hasattr(validator, "task_names") + assert validator.task_names == ["STS12"] + + @pytest.mark.sanity + @pytest.mark.slow + def test_evaluation_reproducible(self, validator): + """Test that evaluation produces consistent results.""" + # Run evaluation twice + results1 = validator.run_evaluation() + results2 = validator.run_evaluation() + + # Results should be identical (or very close) + assert results1["mteb_main_score"] == pytest.approx( + results2["mteb_main_score"], abs=0.01 + ) + + for task in results1["mteb_task_scores"]: + assert results1["mteb_task_scores"][task] == pytest.approx( + results2["mteb_task_scores"][task], abs=0.01 + ) + + @pytest.mark.regression + @pytest.mark.slow + def test_different_models_different_scores(self): + """Test that different models produce different scores.""" + # This test verifies the evaluation is model-specific + validator1 = MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2", + task_names=["STS12"], + ) + + # Note: This would require a different model to be installed + # Skipping if second model not available + try: + validator2 = MTEBValidator( + model_name="sentence-transformers/paraphrase-MiniLM-L3-v2", + task_names=["STS12"], + ) + + results1 = validator1.run_evaluation() + results2 = validator2.run_evaluation() + + # Different models should produce different scores + # (though they might be similar) + assert "mteb_main_score" in results1 + assert "mteb_main_score" in results2 + except Exception: # noqa: BLE001 + # Skip if second model is unavailable + pytest.skip("Second model not available for comparison") + + @pytest.mark.sanity + def test_initialization_with_none_tasks(self): + """Test initialization when tasks is None (should use default).""" + validator = MTEBValidator( + model_name="sentence-transformers/all-MiniLM-L6-v2", + task_names=None, + ) + + # Should use DEFAULT_MTEB_TASKS + assert validator.task_names == DEFAULT_MTEB_TASKS + + @pytest.mark.regression + @pytest.mark.slow + def test_evaluation_returns_dict_structure(self, validator): + """Test that evaluation returns expected dictionary structure.""" + results = validator.run_evaluation() + + # Check structure + assert isinstance(results, dict) + assert set(results.keys()) == {"mteb_main_score", "mteb_task_scores"} + + # Check types + assert isinstance(results["mteb_main_score"], float) + assert isinstance(results["mteb_task_scores"], dict) + + # Check task scores structure + for task_name, score in results["mteb_task_scores"].items(): + assert isinstance(task_name, str) + assert isinstance(score, int | float) diff --git a/tests/unit/benchmark/quality/test_validators.py b/tests/unit/benchmark/quality/test_validators.py new file mode 100644 index 000000000..08b286f4b --- /dev/null +++ b/tests/unit/benchmark/quality/test_validators.py @@ -0,0 +1,306 @@ +from __future__ import annotations + +import numpy as np +import pytest + +from guidellm.benchmark.quality.validators import compute_cosine_similarity + +# Check for sentence-transformers availability for quality validator tests +try: + import sentence_transformers # noqa: F401 + + EMBEDDINGS_VALIDATOR_AVAILABLE = True +except ImportError: + EMBEDDINGS_VALIDATOR_AVAILABLE = False + +if EMBEDDINGS_VALIDATOR_AVAILABLE: + from guidellm.benchmark.quality.validators import EmbeddingsQualityValidator + + +class TestComputeCosineSimilarity: + """Tests for cosine similarity computation function.""" + + @pytest.mark.smoke + def test_identical_vectors(self): + """Test cosine similarity of identical vectors is 1.0.""" + vec = np.array([1.0, 2.0, 3.0, 4.0]) + similarity = compute_cosine_similarity(vec, vec) + assert similarity == pytest.approx(1.0, abs=1e-6) + + @pytest.mark.smoke + def test_orthogonal_vectors(self): + """Test cosine similarity of orthogonal vectors is 0.0.""" + vec1 = np.array([1.0, 0.0, 0.0]) + vec2 = np.array([0.0, 1.0, 0.0]) + similarity = compute_cosine_similarity(vec1, vec2) + assert similarity == pytest.approx(0.0, abs=1e-6) + + @pytest.mark.smoke + def test_opposite_vectors(self): + """Test cosine similarity of opposite vectors is -1.0.""" + vec1 = np.array([1.0, 2.0, 3.0]) + vec2 = np.array([-1.0, -2.0, -3.0]) + similarity = compute_cosine_similarity(vec1, vec2) + assert similarity == pytest.approx(-1.0, abs=1e-6) + + @pytest.mark.sanity + def test_similar_vectors(self): + """Test cosine similarity of similar vectors is close to 1.0.""" + vec1 = np.array([1.0, 2.0, 3.0, 4.0]) + vec2 = np.array([1.1, 2.1, 2.9, 4.0]) + similarity = compute_cosine_similarity(vec1, vec2) + assert similarity > 0.99 + assert similarity <= 1.0 + + @pytest.mark.sanity + def test_dissimilar_vectors(self): + """Test cosine similarity of dissimilar vectors is low.""" + vec1 = np.array([1.0, 0.0, 0.0]) + vec2 = np.array([0.1, 1.0, 0.0]) + similarity = compute_cosine_similarity(vec1, vec2) + assert similarity < 0.2 + assert similarity >= 0.0 + + @pytest.mark.sanity + def test_normalized_vectors(self): + """Test with pre-normalized vectors (unit length).""" + # Pre-normalized to unit length + vec1 = np.array([1.0, 0.0, 0.0]) + vec2 = np.array([0.707107, 0.707107, 0.0]) # 45 degrees + similarity = compute_cosine_similarity(vec1, vec2) + assert similarity == pytest.approx(0.707107, abs=1e-5) + + @pytest.mark.regression + def test_high_dimensional_vectors(self): + """Test with high-dimensional vectors (typical embedding size).""" + rng = np.random.default_rng(42) + vec1 = rng.random(384) # Common embedding dimension + vec2 = rng.random(384) + + similarity = compute_cosine_similarity(vec1, vec2) + assert -1.0 <= similarity <= 1.0 + + @pytest.mark.regression + def test_zero_vector_handling(self): + """Test behavior with zero vectors (edge case).""" + vec1 = np.array([1.0, 2.0, 3.0]) + vec2 = np.array([0.0, 0.0, 0.0]) + + # Zero vector should return 0.0 (implementation handles gracefully) + similarity = compute_cosine_similarity(vec1, vec2) + assert similarity == 0.0 + + @pytest.mark.regression + def test_single_dimension_vectors(self): + """Test with single-dimension vectors.""" + vec1 = np.array([5.0]) + vec2 = np.array([3.0]) + similarity = compute_cosine_similarity(vec1, vec2) + assert similarity == pytest.approx(1.0, abs=1e-6) + + vec3 = np.array([-5.0]) + similarity_neg = compute_cosine_similarity(vec1, vec3) + assert similarity_neg == pytest.approx(-1.0, abs=1e-6) + + @pytest.mark.sanity + def test_return_type(self): + """Test that return type is Python float.""" + vec1 = np.array([1.0, 2.0, 3.0]) + vec2 = np.array([4.0, 5.0, 6.0]) + similarity = compute_cosine_similarity(vec1, vec2) + assert isinstance(similarity, float) + + +@pytest.mark.skipif( + not EMBEDDINGS_VALIDATOR_AVAILABLE, + reason="EmbeddingsQualityValidator requires sentence-transformers", +) +class TestEmbeddingsQualityValidator: + """Tests for EmbeddingsQualityValidator class.""" + + @pytest.fixture + def validator(self): + """Create a validator with a test model.""" + # Use a small, fast model for testing + return EmbeddingsQualityValidator( + baseline_model="sentence-transformers/all-MiniLM-L6-v2" + ) + + @pytest.mark.smoke + def test_initialization(self, validator): + """Test validator initialization.""" + assert validator is not None + assert validator.baseline_model is not None + + @pytest.mark.sanity + def test_validate_against_baseline_same_model(self, validator): + """Test validation against baseline with same model.""" + text = "This is a test sentence for embeddings." + + # Get baseline embedding + baseline_embedding = validator.baseline_model.encode(text) + + # Validate against itself (should be very high similarity) + similarity = validator.validate_against_baseline(text, baseline_embedding) + + assert similarity == pytest.approx(1.0, abs=1e-6) + assert isinstance(similarity, float) + + @pytest.mark.sanity + def test_validate_against_baseline_different_embedding(self, validator): + """Test validation with a different (random) embedding.""" + text = "This is a test sentence." + + # Create a random embedding (different from baseline) + rng = np.random.default_rng(42) + random_embedding = rng.random(384) # MiniLM dimension + # Normalize to unit length + random_embedding = random_embedding / np.linalg.norm(random_embedding) + + similarity = validator.validate_against_baseline(text, random_embedding) + + # Random embedding should have low similarity + assert similarity < 0.5 + assert similarity >= -1.0 + + @pytest.mark.regression + def test_validate_multiple_texts(self, validator): + """Test validation with multiple different texts.""" + texts = [ + "Machine learning is a subset of artificial intelligence.", + "The weather today is sunny and warm.", + "Python is a popular programming language.", + ] + + for text in texts: + baseline_embedding = validator.baseline_model.encode(text) + similarity = validator.validate_against_baseline(text, baseline_embedding) + # Same model should have perfect similarity + assert similarity == pytest.approx(1.0, abs=1e-6) + + @pytest.mark.sanity + def test_check_self_consistency_identical_embeddings(self, validator): + """Test self-consistency with identical embeddings.""" + text = "Test sentence for consistency check." + + # Generate same embedding twice + emb1 = validator.baseline_model.encode(text) + emb2 = validator.baseline_model.encode(text) + + consistency = validator.check_self_consistency(text, [emb1, emb2]) + + # Should be perfectly consistent + assert consistency == pytest.approx(1.0, abs=1e-6) + + @pytest.mark.sanity + def test_check_self_consistency_single_embedding(self, validator): + """Test self-consistency with only one embedding.""" + text = "Single embedding test." + emb = validator.baseline_model.encode(text) + + consistency = validator.check_self_consistency(text, [emb]) + + # Single embedding should return 1.0 (perfectly consistent) + assert consistency == 1.0 + + @pytest.mark.sanity + def test_check_self_consistency_empty_list(self, validator): + """Test self-consistency with empty embedding list.""" + text = "Empty list test." + + consistency = validator.check_self_consistency(text, []) + + # Empty list should return 1.0 (no inconsistency) + assert consistency == 1.0 + + @pytest.mark.regression + def test_check_self_consistency_multiple_embeddings(self, validator): + """Test self-consistency with multiple embeddings.""" + text = "Test sentence for multiple embeddings." + + # Generate same embedding multiple times + embeddings = [validator.baseline_model.encode(text) for _ in range(5)] + + consistency = validator.check_self_consistency(text, embeddings) + + # Should be highly consistent (model is deterministic) + assert consistency == pytest.approx(1.0, abs=1e-6) + + @pytest.mark.regression + def test_check_self_consistency_different_embeddings(self, validator): + """Test self-consistency with intentionally different embeddings.""" + text = "Consistency test." + rng = np.random.default_rng(42) + + # First embedding from model + emb1 = validator.baseline_model.encode(text) + + # Second embedding is random + emb2 = rng.random(384) + emb2 = emb2 / np.linalg.norm(emb2) + + consistency = validator.check_self_consistency(text, [emb1, emb2]) + + # Should have low consistency + assert consistency < 0.5 + + @pytest.mark.sanity + def test_embedding_dimensions(self, validator): + """Test that baseline model produces expected dimensions.""" + text = "Dimension test." + embedding = validator.baseline_model.encode(text) + + # MiniLM-L6-v2 produces 384-dimensional embeddings + assert embedding.shape == (384,) + + @pytest.mark.regression + def test_baseline_model_deterministic(self, validator): + """Test that baseline model produces deterministic results.""" + text = "Deterministic test." + + # Encode same text multiple times + emb1 = validator.baseline_model.encode(text) + emb2 = validator.baseline_model.encode(text) + emb3 = validator.baseline_model.encode(text) + + # All embeddings should be identical + assert np.allclose(emb1, emb2, atol=1e-6) + assert np.allclose(emb2, emb3, atol=1e-6) + + @pytest.mark.sanity + def test_similarity_range(self, validator): + """Test that similarity values are within valid range.""" + texts = [ + "First test sentence.", + "Second test sentence.", + "Completely different topic about weather.", + ] + + for text in texts: + baseline_emb = validator.baseline_model.encode(text) + similarity = validator.validate_against_baseline(text, baseline_emb) + + # Similarity should always be in [-1, 1] + assert -1.0 <= similarity <= 1.0 + + @pytest.mark.regression + def test_vllm_tolerance_standard(self, validator): + """Test that similarity meets vLLM standard tolerance (1e-2).""" + text = "vLLM tolerance test." + + baseline_emb = validator.baseline_model.encode(text) + similarity = validator.validate_against_baseline(text, baseline_emb) + + # Same model should easily meet 1e-2 tolerance + assert abs(1.0 - similarity) < 1e-2 + + @pytest.mark.regression + def test_vllm_tolerance_mteb(self, validator): + """Test that similarity meets vLLM MTEB tolerance (5e-4).""" + text = "vLLM MTEB tolerance test." + + baseline_emb = validator.baseline_model.encode(text) + similarity = validator.validate_against_baseline(text, baseline_emb) + + # Same model should easily meet 5e-4 tolerance + assert abs(1.0 - similarity) < 5e-4 diff --git a/tests/unit/benchmark/schemas/embeddings/__init__.py b/tests/unit/benchmark/schemas/embeddings/__init__.py new file mode 100644 index 000000000..ea7cc06e7 --- /dev/null +++ b/tests/unit/benchmark/schemas/embeddings/__init__.py @@ -0,0 +1 @@ +"""Unit tests for embeddings benchmark schemas.""" diff --git a/tests/unit/benchmark/schemas/embeddings/test_accumulator.py b/tests/unit/benchmark/schemas/embeddings/test_accumulator.py new file mode 100644 index 000000000..15c7c7677 --- /dev/null +++ b/tests/unit/benchmark/schemas/embeddings/test_accumulator.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import pytest + +from guidellm.benchmark.schemas.embeddings.accumulator import ( + EmbeddingsBenchmarkAccumulator, + EmbeddingsQualityMetricsAccumulator, +) + + +class TestEmbeddingsQualityMetricsAccumulator: + """Tests for EmbeddingsQualityMetricsAccumulator.""" + + @pytest.mark.smoke + def test_initialization(self): + """Test accumulator initialization.""" + accumulator = EmbeddingsQualityMetricsAccumulator() + assert accumulator.cosine_similarities == [] + + @pytest.mark.sanity + def test_add_cosine_similarity(self): + """Test adding cosine similarity values.""" + accumulator = EmbeddingsQualityMetricsAccumulator() + + # Add some cosine similarity values + accumulator.cosine_similarities.append(0.98) + accumulator.cosine_similarities.append(0.97) + accumulator.cosine_similarities.append(0.99) + + assert len(accumulator.cosine_similarities) == 3 + assert accumulator.cosine_similarities[0] == 0.98 + assert accumulator.cosine_similarities[1] == 0.97 + assert accumulator.cosine_similarities[2] == 0.99 + + @pytest.mark.sanity + def test_multiple_instances_independent(self): + """Test that multiple accumulator instances are independent.""" + acc1 = EmbeddingsQualityMetricsAccumulator() + acc2 = EmbeddingsQualityMetricsAccumulator() + + acc1.cosine_similarities.append(0.95) + acc2.cosine_similarities.append(0.99) + + assert len(acc1.cosine_similarities) == 1 + assert len(acc2.cosine_similarities) == 1 + assert acc1.cosine_similarities[0] != acc2.cosine_similarities[0] + + +class TestEmbeddingsBenchmarkAccumulator: + """Tests for EmbeddingsBenchmarkAccumulator.""" + + @pytest.mark.smoke + def test_class_signatures(self): + """Validate public surface and key properties.""" + # Check that class has expected attributes (will be set during init + # with config) + assert hasattr(EmbeddingsBenchmarkAccumulator, "model_fields") + assert "quality" in EmbeddingsBenchmarkAccumulator.model_fields + assert ( + "encoding_format_breakdown" + in EmbeddingsBenchmarkAccumulator.model_fields + ) + + @pytest.mark.smoke + def test_initialization(self): + """Test accumulator has proper default fields.""" + # EmbeddingsBenchmarkAccumulator requires a BenchmarkConfig for full + # instantiation but we can test that the class has expected fields + fields = EmbeddingsBenchmarkAccumulator.model_fields + + assert "quality_enabled" in fields + assert "quality" in fields + assert "encoding_format_breakdown" in fields + assert "timings" in fields + assert "scheduler_metrics" in fields + assert "metrics" in fields + assert "requests" in fields + + @pytest.mark.sanity + def test_encoding_format_breakdown_field(self): + """Test that encoding_format_breakdown field exists and is a dict.""" + # Test that the field schema is correct + fields = EmbeddingsBenchmarkAccumulator.model_fields + assert "encoding_format_breakdown" in fields + + # Field should be a dict type + field_info = fields["encoding_format_breakdown"] + assert field_info.annotation == dict[str, int] + + @pytest.mark.sanity + def test_quality_metrics_accumulator_field(self): + """Test that quality field exists and has correct type.""" + fields = EmbeddingsBenchmarkAccumulator.model_fields + assert "quality" in fields + assert "quality_enabled" in fields + + # Field should be optional EmbeddingsQualityMetricsAccumulator + field_info = fields["quality"] + # Check field is optional (can be None) + assert field_info.is_required() is False + + @pytest.mark.regression + def test_accumulator_field_defaults(self): + """Test that accumulator fields have proper default factories.""" + fields = EmbeddingsBenchmarkAccumulator.model_fields + + # Check fields with default factories + assert "timings" in fields + assert "scheduler_metrics" in fields + assert "metrics" in fields + assert "requests" in fields + + # Check that encoding_format_breakdown has dict factory + assert fields["encoding_format_breakdown"].default_factory is not None + + @pytest.mark.regression + def test_type_literal(self): + """Test that type_ field is correctly set.""" + fields = EmbeddingsBenchmarkAccumulator.model_fields + assert "type_" in fields + + # Check the default value + assert fields["type_"].default == "embeddings_benchmark_accumulator" diff --git a/tests/unit/benchmark/schemas/embeddings/test_entrypoints.py b/tests/unit/benchmark/schemas/embeddings/test_entrypoints.py new file mode 100644 index 000000000..bc97ad51c --- /dev/null +++ b/tests/unit/benchmark/schemas/embeddings/test_entrypoints.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from guidellm.benchmark.schemas.embeddings.entrypoints import BenchmarkEmbeddingsArgs + + +class TestBenchmarkEmbeddingsArgs: + """Tests for BenchmarkEmbeddingsArgs schema.""" + + @pytest.mark.smoke + def test_class_signatures(self): + """Validate public surface and key properties.""" + fields = BenchmarkEmbeddingsArgs.model_fields + + # Standard benchmark args + for field_name in ( + "target", + "model", + "backend", + "profile", + "data", + "outputs", + ): + assert field_name in fields + + # Embeddings-specific args + for field_name in ( + "enable_quality_validation", + "baseline_model", + "quality_tolerance", + "enable_mteb", + "mteb_tasks", + "encoding_format", + ): + assert field_name in fields + + @pytest.mark.smoke + def test_initialization_minimal(self): + """Test initialization with minimal required fields.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + ) + + assert args.target == "http://localhost:8000" + assert args.enable_quality_validation is False + assert args.baseline_model is None + assert args.quality_tolerance == 1e-2 + assert args.enable_mteb is False + assert args.mteb_tasks is None + assert args.encoding_format == "float" # Default is "float" + + @pytest.mark.sanity + def test_initialization_with_quality_validation(self): + """Test initialization with quality validation enabled.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + model="test-model", + enable_quality_validation=True, + baseline_model="sentence-transformers/all-MiniLM-L6-v2", + quality_tolerance=5e-4, + ) + + assert args.enable_quality_validation is True + assert args.baseline_model == "sentence-transformers/all-MiniLM-L6-v2" + assert args.quality_tolerance == 5e-4 + + @pytest.mark.sanity + def test_initialization_with_mteb(self): + """Test initialization with MTEB enabled.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + enable_mteb=True, + mteb_tasks=["STS12", "STS13", "STSBenchmark"], + ) + + assert args.enable_mteb is True + assert args.mteb_tasks == ["STS12", "STS13", "STSBenchmark"] + + @pytest.mark.sanity + def test_initialization_with_encoding_format(self): + """Test initialization with encoding format.""" + # Float encoding + args_float = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + encoding_format="float", + ) + assert args_float.encoding_format == "float" + + # Base64 encoding + args_base64 = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + encoding_format="base64", + ) + assert args_base64.encoding_format == "base64" + + @pytest.mark.sanity + def test_initialization_all_fields(self): + """Test initialization with all embeddings-specific fields.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + model="test-embedding-model", + backend="openai_http", + profile="sweep", + data=["embeddings_data.json"], + outputs=["json", "csv", "html"], + enable_quality_validation=True, + baseline_model="sentence-transformers/all-MiniLM-L6-v2", + quality_tolerance=1e-3, + enable_mteb=True, + mteb_tasks=["STS12", "STS13"], + encoding_format="float", + ) + + # Standard fields + assert args.target == "http://localhost:8000" + assert args.model == "test-embedding-model" + assert args.backend == "openai_http" + assert args.profile == "sweep" + assert args.data == ["embeddings_data.json"] + assert args.outputs == ["json", "csv", "html"] + + # Embeddings-specific fields + assert args.enable_quality_validation is True + assert args.baseline_model == "sentence-transformers/all-MiniLM-L6-v2" + assert args.quality_tolerance == 1e-3 + assert args.enable_mteb is True + assert args.mteb_tasks == ["STS12", "STS13"] + assert args.encoding_format == "float" + + @pytest.mark.sanity + def test_invalid_initialization_missing_target(self): + """Missing target should fail validation.""" + with pytest.raises(ValidationError): + BenchmarkEmbeddingsArgs() # type: ignore[call-arg] + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("field_name", "bad_value"), + [ + ("target", None), + ("target", 123), + ("model", 123), + ("enable_quality_validation", "not_a_bool"), + ("quality_tolerance", "not_a_float"), + ("enable_mteb", "not_a_bool"), + ("mteb_tasks", "not_a_list"), + ("encoding_format", 123), + ], + ) + def test_invalid_initialization_values(self, field_name: str, bad_value): + """Type mismatches should raise.""" + base = {"target": "http://localhost:8000"} + base[field_name] = bad_value + with pytest.raises(ValidationError): + BenchmarkEmbeddingsArgs(**base) # type: ignore[arg-type] + + @pytest.mark.smoke + def test_marshalling(self): + """Test model_dump / model_validate round-trip.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + model="test-model", + data=["test_data.json"], # Need at least one data item + enable_quality_validation=True, + baseline_model="sentence-transformers/all-MiniLM-L6-v2", + quality_tolerance=1e-3, + ) + + dumped = args.model_dump() + rebuilt = BenchmarkEmbeddingsArgs.model_validate(dumped) + + assert rebuilt.target == args.target + assert rebuilt.model == args.model + assert rebuilt.enable_quality_validation == args.enable_quality_validation + assert rebuilt.baseline_model == args.baseline_model + assert rebuilt.quality_tolerance == args.quality_tolerance + + @pytest.mark.regression + def test_quality_tolerance_default_value(self): + """Test default quality tolerance matches vLLM pattern (1e-2).""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + ) + assert args.quality_tolerance == 1e-2 + + @pytest.mark.regression + def test_mteb_tasks_default_none(self): + """Test MTEB tasks default to None (will use DEFAULT_MTEB_TASKS in + validator).""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + enable_mteb=True, + ) + # mteb_tasks should be None by default + # The validator will set DEFAULT_MTEB_TASKS if None + assert args.mteb_tasks is None or isinstance(args.mteb_tasks, list) + + @pytest.mark.sanity + def test_optional_fields(self): + """Test that embeddings-specific fields are optional.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + ) + + # All embeddings-specific fields should have defaults + assert args.enable_quality_validation is False + assert args.baseline_model is None + assert args.quality_tolerance == 1e-2 + assert args.enable_mteb is False + assert args.mteb_tasks is None + assert args.encoding_format == "float" # Default is "float", not None + + @pytest.mark.regression + def test_quality_validation_without_baseline_model(self): + """Test quality validation can be enabled without explicit baseline model.""" + # Should be valid - baseline model can be determined later or use default + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + enable_quality_validation=True, + ) + + assert args.enable_quality_validation is True + assert args.baseline_model is None + + @pytest.mark.regression + def test_mteb_tasks_as_list(self): + """Test MTEB tasks can be specified as a list.""" + tasks = ["STS12", "STS13", "STS14", "STS15", "STSBenchmark"] + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + enable_mteb=True, + mteb_tasks=tasks, + ) + + assert args.mteb_tasks == tasks + assert len(args.mteb_tasks) == 5 + + @pytest.mark.sanity + def test_encoding_format_optional(self): + """Test encoding format has default value.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + ) + # Default is "float" + assert args.encoding_format == "float" + + @pytest.mark.regression + def test_standard_benchmark_args_inherited(self): + """Test that standard BenchmarkArgs fields are inherited.""" + args = BenchmarkEmbeddingsArgs( + target="http://localhost:8000", + model="test-model", + backend="openai_http", + profile="sweep", + data=["data.json"], + outputs=["json", "csv"], + ) + + # These are inherited from BenchmarkArgs + assert hasattr(args, "target") + assert hasattr(args, "model") + assert hasattr(args, "backend") + assert hasattr(args, "profile") + assert hasattr(args, "data") + assert hasattr(args, "outputs") + + # Verify values + assert args.target == "http://localhost:8000" + assert args.model == "test-model" + assert args.backend == "openai_http" + assert args.profile == "sweep" + assert args.data == ["data.json"] + assert args.outputs == ["json", "csv"] diff --git a/tests/unit/benchmark/schemas/embeddings/test_metrics.py b/tests/unit/benchmark/schemas/embeddings/test_metrics.py new file mode 100644 index 000000000..a4c2624a2 --- /dev/null +++ b/tests/unit/benchmark/schemas/embeddings/test_metrics.py @@ -0,0 +1,364 @@ +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from guidellm.benchmark.schemas.embeddings.metrics import ( + EmbeddingsMetrics, + EmbeddingsQualityMetrics, +) +from guidellm.schemas import ( + DistributionSummary, + Percentiles, + StatusBreakdown, + StatusDistributionSummary, +) + + +def create_percentiles(p50=0.5) -> Percentiles: + """Helper to create Percentiles with all required fields.""" + return Percentiles( + p001=p50 * 0.5, + p01=p50 * 0.6, + p05=p50 * 0.7, + p10=p50 * 0.8, + p25=p50 * 0.9, + p50=p50, + p75=p50 * 1.05, + p90=p50 * 1.1, + p95=p50 * 1.15, + p99=p50 * 1.2, + p999=p50 * 1.25, + ) + + +def create_distribution_summary( + mean=0.5, median=0.5, mode=0.5, variance=0.01, std_dev=0.1, + min_val=0.1, max_val=1.0, count=100, total_sum=50.0 +) -> DistributionSummary: + """Helper to create DistributionSummary with all required fields.""" + return DistributionSummary( + mean=mean, + median=median, + mode=mode, + variance=variance, + std_dev=std_dev, + min=min_val, + max=max_val, + count=count, + total_sum=total_sum, + percentiles=create_percentiles(median), + ) + + +class TestEmbeddingsQualityMetrics: + """Tests for EmbeddingsQualityMetrics schema.""" + + @pytest.mark.smoke + def test_class_signatures(self): + """Validate public surface and key properties.""" + fields = EmbeddingsQualityMetrics.model_fields + for field_name in ( + "baseline_cosine_similarity", + "self_consistency_score", + "mteb_main_score", + "mteb_task_scores", + ): + assert field_name in fields + + @pytest.mark.smoke + def test_initialization_minimal(self): + """Test initialization with minimal required fields.""" + metrics = EmbeddingsQualityMetrics() + assert metrics.baseline_cosine_similarity is None + assert metrics.self_consistency_score is None + assert metrics.mteb_main_score is None + assert metrics.mteb_task_scores is None + + @pytest.mark.sanity + def test_initialization_with_cosine_similarity(self): + """Test initialization with baseline cosine similarity.""" + dist = create_distribution_summary( + mean=0.98, + median=0.985, + mode=0.985, + variance=0.0001, + std_dev=0.01, + min_val=0.95, + max_val=0.99, + count=100, + total_sum=98.0, + ) + status_dist = StatusDistributionSummary( + successful=dist, + errored=None, + incomplete=None, + total=None, + ) + + metrics = EmbeddingsQualityMetrics( + baseline_cosine_similarity=status_dist + ) + assert metrics.baseline_cosine_similarity is not None + assert metrics.baseline_cosine_similarity.successful.mean == 0.98 + + @pytest.mark.sanity + def test_initialization_with_mteb_scores(self): + """Test initialization with MTEB scores.""" + metrics = EmbeddingsQualityMetrics( + mteb_main_score=75.5, + mteb_task_scores={ + "STS12": 72.3, + "STS13": 78.1, + "STSBenchmark": 80.9, + }, + ) + assert metrics.mteb_main_score == 75.5 + assert metrics.mteb_task_scores is not None + assert len(metrics.mteb_task_scores) == 3 + assert metrics.mteb_task_scores["STS12"] == 72.3 + + @pytest.mark.sanity + def test_initialization_all_fields(self): + """Test initialization with all fields populated.""" + cos_dist = create_distribution_summary( + mean=0.98, + median=0.985, + mode=0.985, + variance=0.0001, + std_dev=0.01, + min_val=0.95, + max_val=0.99, + count=100, + total_sum=98.0, + ) + cons_dist = create_distribution_summary( + mean=0.995, + median=0.997, + mode=0.997, + variance=0.000025, + std_dev=0.005, + min_val=0.98, + max_val=0.999, + count=100, + total_sum=99.5, + ) + + metrics = EmbeddingsQualityMetrics( + baseline_cosine_similarity=StatusDistributionSummary( + successful=cos_dist, errored=None, incomplete=None, total=None + ), + self_consistency_score=StatusDistributionSummary( + successful=cons_dist, errored=None, incomplete=None, total=None + ), + mteb_main_score=75.5, + mteb_task_scores={"STS12": 72.3, "STS13": 78.1}, + ) + + assert metrics.baseline_cosine_similarity.successful.mean == 0.98 + assert metrics.self_consistency_score.successful.mean == 0.995 + assert metrics.mteb_main_score == 75.5 + assert len(metrics.mteb_task_scores) == 2 + + @pytest.mark.smoke + def test_marshalling(self): + """Test model_dump / model_validate round-trip.""" + metrics = EmbeddingsQualityMetrics( + mteb_main_score=75.5, + mteb_task_scores={"STS12": 72.3}, + ) + dumped = metrics.model_dump() + rebuilt = EmbeddingsQualityMetrics.model_validate(dumped) + assert rebuilt.mteb_main_score == metrics.mteb_main_score + assert rebuilt.mteb_task_scores == metrics.mteb_task_scores + + +class TestEmbeddingsMetrics: + """Tests for EmbeddingsMetrics schema.""" + + @pytest.mark.smoke + def test_class_signatures(self): + """Validate public surface and key properties.""" + fields = EmbeddingsMetrics.model_fields + for field_name in ( + "request_totals", + "requests_per_second", + "request_concurrency", + "request_latency", + "input_tokens_count", + "input_tokens_per_second", + "quality", + "encoding_format_breakdown", + ): + assert field_name in fields + + @pytest.mark.smoke + def test_initialization_minimal(self): + """Test initialization with required fields.""" + metrics = EmbeddingsMetrics( + request_totals=StatusBreakdown( + successful=10, incomplete=0, errored=0, total=10 + ), + requests_per_second=StatusDistributionSummary(), + request_concurrency=StatusDistributionSummary(), + request_latency=StatusDistributionSummary(), + input_tokens_count=StatusBreakdown( + successful=500, incomplete=0, errored=0, total=500 + ), + input_tokens_per_second=StatusDistributionSummary(), + ) + + assert metrics.request_totals.successful == 10 + assert metrics.input_tokens_count.successful == 500 + assert metrics.quality is None + assert metrics.encoding_format_breakdown == {} + + @pytest.mark.sanity + def test_initialization_with_quality_metrics(self): + """Test initialization with quality validation metrics.""" + quality = EmbeddingsQualityMetrics( + mteb_main_score=75.5, + mteb_task_scores={"STS12": 72.3}, + ) + + metrics = EmbeddingsMetrics( + request_totals=StatusBreakdown( + successful=10, incomplete=0, errored=0, total=10 + ), + requests_per_second=StatusDistributionSummary(), + request_concurrency=StatusDistributionSummary(), + request_latency=StatusDistributionSummary(), + input_tokens_count=StatusBreakdown( + successful=500, incomplete=0, errored=0, total=500 + ), + input_tokens_per_second=StatusDistributionSummary(), + quality=quality, + ) + + assert metrics.quality is not None + assert metrics.quality.mteb_main_score == 75.5 + + @pytest.mark.sanity + def test_initialization_with_encoding_breakdown(self): + """Test initialization with encoding format breakdown.""" + metrics = EmbeddingsMetrics( + request_totals=StatusBreakdown( + successful=15, incomplete=0, errored=0, total=15 + ), + requests_per_second=StatusDistributionSummary(), + request_concurrency=StatusDistributionSummary(), + request_latency=StatusDistributionSummary(), + input_tokens_count=StatusBreakdown( + successful=750, incomplete=0, errored=0, total=750 + ), + input_tokens_per_second=StatusDistributionSummary(), + encoding_format_breakdown={"float": 10, "base64": 5}, + ) + + assert metrics.encoding_format_breakdown == {"float": 10, "base64": 5} + assert sum(metrics.encoding_format_breakdown.values()) == 15 + + @pytest.mark.sanity + def test_initialization_all_fields(self): + """Test initialization with all fields populated.""" + quality = EmbeddingsQualityMetrics( + mteb_main_score=75.5, + mteb_task_scores={"STS12": 72.3, "STS13": 78.1}, + ) + + dist = create_distribution_summary( + mean=0.15, + median=0.14, + mode=0.14, + variance=0.0004, + std_dev=0.02, + min_val=0.10, + max_val=0.20, + count=100, + total_sum=15.0, + ) + + metrics = EmbeddingsMetrics( + request_totals=StatusBreakdown( + successful=100, incomplete=5, errored=2, total=107 + ), + requests_per_second=StatusDistributionSummary( + successful=dist, errored=None, incomplete=None, total=None + ), + request_concurrency=StatusDistributionSummary( + successful=dist, errored=None, incomplete=None, total=None + ), + request_latency=StatusDistributionSummary( + successful=dist, errored=None, incomplete=None, total=None + ), + input_tokens_count=StatusBreakdown( + successful=5000, incomplete=200, errored=100, total=5300 + ), + input_tokens_per_second=StatusDistributionSummary( + successful=dist, errored=None, incomplete=None, total=None + ), + quality=quality, + encoding_format_breakdown={"float": 80, "base64": 20}, + ) + + assert metrics.request_totals.successful == 100 + assert metrics.request_totals.total == 107 + assert metrics.input_tokens_count.successful == 5000 + assert metrics.quality.mteb_main_score == 75.5 + assert metrics.encoding_format_breakdown["float"] == 80 + + @pytest.mark.sanity + def test_invalid_initialization_missing(self): + """Missing required fields should fail validation.""" + with pytest.raises(ValidationError): + EmbeddingsMetrics() # type: ignore[call-arg] + + @pytest.mark.smoke + def test_marshalling(self): + """Test model_dump / model_validate round-trip.""" + metrics = EmbeddingsMetrics( + request_totals=StatusBreakdown( + successful=10, incomplete=0, errored=0, total=10 + ), + requests_per_second=StatusDistributionSummary(), + request_concurrency=StatusDistributionSummary(), + request_latency=StatusDistributionSummary(), + input_tokens_count=StatusBreakdown( + successful=500, incomplete=0, errored=0, total=500 + ), + input_tokens_per_second=StatusDistributionSummary(), + encoding_format_breakdown={"float": 10}, + ) + + dumped = metrics.model_dump() + rebuilt = EmbeddingsMetrics.model_validate(dumped) + assert ( + rebuilt.request_totals.successful + == metrics.request_totals.successful + ) + assert ( + rebuilt.input_tokens_count.successful + == metrics.input_tokens_count.successful + ) + assert ( + rebuilt.encoding_format_breakdown + == metrics.encoding_format_breakdown + ) + + @pytest.mark.regression + def test_no_output_tokens(self): + """Verify embeddings have dummy output token fields for compatibility.""" + fields = EmbeddingsMetrics.model_fields + # Embeddings have dummy output token fields for progress tracker compatibility + # They exist but are always zero + assert "output_token_count" in fields + assert "output_tokens_per_second" in fields + + @pytest.mark.regression + def test_no_streaming_metrics(self): + """Verify embeddings metrics do not have streaming-related fields.""" + fields = EmbeddingsMetrics.model_fields + # Embeddings should NOT have streaming metrics + assert "time_to_first_token" not in fields + assert "inter_token_latency" not in fields + assert "time_per_output_token" not in fields diff --git a/tests/unit/extras/test_audio.py b/tests/unit/extras/test_audio.py index b7f783693..70235aab4 100644 --- a/tests/unit/extras/test_audio.py +++ b/tests/unit/extras/test_audio.py @@ -7,7 +7,14 @@ import pytest import torch -from guidellm.extras.audio import encode_audio +# Skip all tests if torchcodec/audio dependencies aren't available +try: + from guidellm.extras.audio import encode_audio +except (ImportError, RuntimeError) as e: + pytest.skip( + f"Audio dependencies not available: {e}", + allow_module_level=True, + ) @pytest.fixture diff --git a/tests/unit/mock_server/handlers/__init__.py b/tests/unit/mock_server/handlers/__init__.py new file mode 100644 index 000000000..d069e344b --- /dev/null +++ b/tests/unit/mock_server/handlers/__init__.py @@ -0,0 +1 @@ +"""Unit tests for mock server handlers.""" diff --git a/tests/unit/mock_server/handlers/test_embeddings.py b/tests/unit/mock_server/handlers/test_embeddings.py new file mode 100644 index 000000000..4d40259e1 --- /dev/null +++ b/tests/unit/mock_server/handlers/test_embeddings.py @@ -0,0 +1,368 @@ +from __future__ import annotations + +import base64 +import struct + +import pytest + +from guidellm.mock_server.config import MockServerConfig +from guidellm.mock_server.handlers.embeddings import EmbeddingsHandler +from guidellm.mock_server.models import ( + EmbeddingsRequest, + EmbeddingsResponse, +) + + +class TestEmbeddingsHandler: + """Tests for embeddings mock server handler.""" + + @pytest.fixture + def handler(self): + """Create embeddings handler with default config.""" + config = MockServerConfig() + return EmbeddingsHandler(config) + + @pytest.fixture + def handler_with_ttft(self): + """Create embeddings handler with TTFT delay.""" + config = MockServerConfig(ttft_ms=100.0) + return EmbeddingsHandler(config) + + @pytest.mark.smoke + def test_initialization(self, handler): + """Test handler initialization.""" + assert handler is not None + assert handler.config is not None + + @pytest.mark.sanity + async def test_handle_basic_request(self, handler): + """Test handling a basic embeddings request.""" + request = EmbeddingsRequest( + input="Test sentence for embedding.", + model="test-embedding-model", + ) + + response = await handler.handle(request) + + assert isinstance(response, EmbeddingsResponse) + assert response.object == "list" + assert len(response.data) == 1 + assert response.model == "test-embedding-model" + + @pytest.mark.sanity + async def test_handle_single_string_input(self, handler): + """Test handling request with single string input.""" + request = EmbeddingsRequest( + input="Single string input.", + model="test-model", + ) + + response = await handler.handle(request) + + assert len(response.data) == 1 + assert response.data[0].index == 0 + assert response.data[0].object == "embedding" + + @pytest.mark.sanity + async def test_handle_list_input(self, handler): + """Test handling request with list of strings.""" + inputs = [ + "First sentence.", + "Second sentence.", + "Third sentence.", + ] + + request = EmbeddingsRequest( + input=inputs, + model="test-model", + ) + + response = await handler.handle(request) + + assert len(response.data) == 3 + for i, emb_obj in enumerate(response.data): + assert emb_obj.index == i + assert emb_obj.object == "embedding" + + @pytest.mark.sanity + async def test_float_encoding(self, handler): + """Test float encoding format (default).""" + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + encoding_format="float", + ) + + response = await handler.handle(request) + + # Embedding should be a list of floats + embedding = response.data[0].embedding + assert isinstance(embedding, list) + assert all(isinstance(x, float) for x in embedding) + + @pytest.mark.sanity + async def test_base64_encoding(self, handler): + """Test base64 encoding format.""" + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + encoding_format="base64", + ) + + response = await handler.handle(request) + + # Embedding should be a base64-encoded string + embedding = response.data[0].embedding + assert isinstance(embedding, str) + + # Verify it's valid base64 + try: + decoded_bytes = base64.b64decode(embedding) + assert len(decoded_bytes) > 0 + except Exception: # noqa: BLE001 + pytest.fail("Invalid base64 encoding") + + @pytest.mark.regression + async def test_base64_encoding_decodes_to_floats(self, handler): + """Test that base64 encoding can be decoded back to floats.""" + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + encoding_format="base64", + ) + + response = await handler.handle(request) + + # Decode base64 to float array + embedding_b64 = response.data[0].embedding + decoded_bytes = base64.b64decode(embedding_b64) + + # Unpack as floats + num_floats = len(decoded_bytes) // 4 # 4 bytes per float + floats = struct.unpack(f"{num_floats}f", decoded_bytes) + + # Should be a valid array of floats + assert len(floats) > 0 + assert all(isinstance(x, float) for x in floats) + + @pytest.mark.sanity + async def test_usage_metrics(self, handler): + """Test that usage metrics are populated.""" + request = EmbeddingsRequest( + input="Test sentence with some tokens.", + model="test-model", + ) + + response = await handler.handle(request) + + assert response.usage is not None + assert response.usage.prompt_tokens > 0 + assert response.usage.total_tokens > 0 + # Embeddings don't have completion tokens + assert response.usage.completion_tokens == 0 + + @pytest.mark.regression + async def test_usage_metrics_batch(self, handler): + """Test usage metrics with batch input.""" + inputs = [ + "First sentence.", + "Second sentence.", + "Third sentence.", + ] + + request = EmbeddingsRequest( + input=inputs, + model="test-model", + ) + + response = await handler.handle(request) + + # Total tokens should sum across all inputs + assert response.usage.prompt_tokens > 0 + assert response.usage.total_tokens == response.usage.prompt_tokens + + @pytest.mark.sanity + async def test_dimensions_parameter(self, handler): + """Test dimensions parameter (Matryoshka embeddings).""" + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + dimensions=128, + encoding_format="float", + ) + + response = await handler.handle(request) + + # Embedding should have specified dimensions + embedding = response.data[0].embedding + assert len(embedding) == 128 + + @pytest.mark.regression + async def test_dimensions_default(self, handler): + """Test default dimensions when not specified.""" + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + encoding_format="float", + ) + + response = await handler.handle(request) + + # Default dimensions should be used (typically 384 or similar) + embedding = response.data[0].embedding + assert len(embedding) > 0 + # Common default dimension sizes + assert len(embedding) in [384, 512, 768, 1024, 1536] + + @pytest.mark.sanity + async def test_truncate_prompt_tokens(self, handler): + """Test truncate_prompt_tokens parameter.""" + request = EmbeddingsRequest( + input="A very long sentence with many tokens that should be truncated.", + model="test-model", + truncate_prompt_tokens=10, + ) + + response = await handler.handle(request) + + # Usage should reflect truncation + assert response.usage.prompt_tokens <= 10 + + @pytest.mark.regression + async def test_embedding_normalized(self, handler): + """Test that embeddings are normalized (unit length).""" + import math + + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + encoding_format="float", + ) + + response = await handler.handle(request) + + embedding = response.data[0].embedding + + # Calculate norm (should be 1.0 for normalized vector) + norm = math.sqrt(sum(x * x for x in embedding)) + assert norm == pytest.approx(1.0, abs=1e-6) + + @pytest.mark.regression + async def test_multiple_embeddings_different(self, handler): + """Test that different inputs produce different embeddings.""" + request = EmbeddingsRequest( + input=["First sentence.", "Second sentence."], + model="test-model", + encoding_format="float", + ) + + response = await handler.handle(request) + + emb1 = response.data[0].embedding + emb2 = response.data[1].embedding + + # Embeddings should be different (random generation) + assert emb1 != emb2 + + @pytest.mark.sanity + async def test_ttft_delay(self, handler_with_ttft): + """Test that TTFT delay is applied.""" + import time + + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + ) + + start = time.time() + await handler_with_ttft.handle(request) + elapsed = time.time() - start + + # Should have some delay (at least 50ms for 100ms TTFT config) + assert elapsed >= 0.05 # Reduced threshold for test reliability + + @pytest.mark.regression + async def test_empty_input(self, handler): + """Test handling empty input string.""" + request = EmbeddingsRequest( + input="", + model="test-model", + ) + + response = await handler.handle(request) + + # Should still produce an embedding (possibly all zeros or minimal) + assert len(response.data) == 1 + assert response.usage.prompt_tokens >= 0 + + @pytest.mark.regression + async def test_response_model_matches_request(self, handler): + """Test that response model matches request model.""" + model_name = "custom-embedding-model-v2" + request = EmbeddingsRequest( + input="Test sentence.", + model=model_name, + ) + + response = await handler.handle(request) + + assert response.model == model_name + + @pytest.mark.sanity + async def test_embedding_object_fields(self, handler): + """Test that embedding objects have correct fields.""" + request = EmbeddingsRequest( + input=["First.", "Second."], + model="test-model", + ) + + response = await handler.handle(request) + + for emb_obj in response.data: + assert hasattr(emb_obj, "object") + assert hasattr(emb_obj, "embedding") + assert hasattr(emb_obj, "index") + assert emb_obj.object == "embedding" + + @pytest.mark.regression + async def test_large_batch_input(self, handler): + """Test handling large batch of inputs.""" + inputs = [f"Sentence number {i}." for i in range(100)] + + request = EmbeddingsRequest( + input=inputs, + model="test-model", + ) + + response = await handler.handle(request) + + assert len(response.data) == 100 + for i, emb_obj in enumerate(response.data): + assert emb_obj.index == i + + @pytest.mark.regression + async def test_user_parameter(self, handler): + """Test user parameter (should be accepted but not affect output).""" + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + user="test-user-123", + ) + + response = await handler.handle(request) + + # Should complete successfully + assert isinstance(response, EmbeddingsResponse) + assert len(response.data) == 1 + + @pytest.mark.sanity + async def test_response_object_field(self, handler): + """Test that response object field is 'list'.""" + request = EmbeddingsRequest( + input="Test sentence.", + model="test-model", + ) + + response = await handler.handle(request) + + assert response.object == "list" diff --git a/tests/unit/schemas/test_embeddings_request_stats.py b/tests/unit/schemas/test_embeddings_request_stats.py new file mode 100644 index 000000000..77e82f843 --- /dev/null +++ b/tests/unit/schemas/test_embeddings_request_stats.py @@ -0,0 +1,355 @@ +from __future__ import annotations + +import asyncio +from typing import Any + +import numpy as np +import pytest +from pydantic import ValidationError + +from guidellm.schemas import ( + EmbeddingsRequestStats, + RequestInfo, + StandardBaseDict, + UsageMetrics, +) +from tests.unit.testing_utils import async_timeout + + +class TestEmbeddingsRequestStats: + """High-coverage, concise tests for EmbeddingsRequestStats.""" + + @pytest.fixture( + params=[ + "short_embedding", + "long_embedding", + "batch_embedding", + "float_encoding", + "base64_encoding", + "with_cosine_similarity", + ], + ) + def valid_instances( + self, request: pytest.FixtureRequest + ) -> tuple[EmbeddingsRequestStats, dict[str, Any]]: + """ + Generate realistic test instances for embeddings requests. + + Returns tuple of (EmbeddingsRequestStats instance, expected values dict). + """ + case_id = request.param + rng = np.random.default_rng(hash(case_id) % (2**32)) + + # Define realistic scenarios based on common embeddings patterns + if case_id == "short_embedding": + # Quick embedding with few tokens + prompt_tokens = 10 + request_start = 0.0 + # Embeddings are faster than generative (no output tokens) + request_end = request_start + rng.uniform(0.05, 0.15) + resolve_end = request_end + encoding_format = "float" + cosine_similarity = None + + elif case_id == "long_embedding": + # Longer text embedding + prompt_tokens = 512 + request_start = 5.0 + # Proportional to input size + request_end = request_start + rng.uniform(0.3, 0.6) + resolve_end = request_end + encoding_format = "float" + cosine_similarity = None + + elif case_id == "batch_embedding": + # Batch processing + prompt_tokens = 150 + request_start = 10.0 + request_end = request_start + rng.uniform(0.2, 0.4) + resolve_end = request_end + encoding_format = "float" + cosine_similarity = None + + elif case_id == "float_encoding": + # Float encoding (default) + prompt_tokens = 50 + request_start = 0.0 + request_end = request_start + rng.uniform(0.1, 0.2) + resolve_end = request_end + encoding_format = "float" + cosine_similarity = None + + elif case_id == "base64_encoding": + # Base64 encoding + prompt_tokens = 50 + request_start = 0.0 + request_end = request_start + rng.uniform(0.1, 0.2) + resolve_end = request_end + encoding_format = "base64" + cosine_similarity = None + + else: # with_cosine_similarity + # With quality validation + prompt_tokens = 25 + request_start = 0.0 + request_end = request_start + rng.uniform(0.08, 0.18) + resolve_end = request_end + encoding_format = "float" + # Realistic cosine similarity (0.95-0.99 for good models) + cosine_similarity = rng.uniform(0.95, 0.99) + + # Build timings object via RequestInfo + info = RequestInfo(request_id=case_id, status="completed") + info.timings.request_start = request_start + info.timings.request_end = request_end + info.timings.resolve_end = resolve_end + + stats = EmbeddingsRequestStats( + request_id=case_id, + info=info, + input_metrics=UsageMetrics(text_tokens=prompt_tokens), + cosine_similarity=cosine_similarity, + encoding_format=encoding_format, + ) + + # Compute expected properties + expected_latency = ( + request_end - request_start + if request_start is not None + else None + ) + + expected: dict[str, Any] = { + "request_start_time": ( + request_start if request_start is not None else resolve_end + ), + "request_end_time": ( + request_end if request_end is not None else resolve_end + ), + "request_latency": expected_latency, + "prompt_tokens": prompt_tokens, + "cosine_similarity": cosine_similarity, + "encoding_format": encoding_format, + } + return stats, expected + + @pytest.mark.smoke + def test_class_signatures(self): + """Validate public surface, inheritance, and key properties.""" + assert issubclass(EmbeddingsRequestStats, StandardBaseDict) + assert hasattr(EmbeddingsRequestStats, "model_dump") + assert hasattr(EmbeddingsRequestStats, "model_validate") + + # fields exposed + fields = EmbeddingsRequestStats.model_fields + for field_name in ( + "type_", + "request_id", + "request_args", + "response_id", + "info", + "input_metrics", + "cosine_similarity", + "encoding_format", + ): + assert field_name in fields + + # computed properties + for prop_name in ( + "request_start_time", + "request_end_time", + "request_latency", + "prompt_tokens", + ): + assert hasattr(EmbeddingsRequestStats, prop_name) + + @pytest.mark.smoke + def test_initialization(self, valid_instances): + """Initialization from realistic inputs.""" + instance, expected = valid_instances + assert isinstance(instance, EmbeddingsRequestStats) + assert instance.type_ == "embeddings_request_stats" + assert instance.request_id + + # Basic fields echo + assert instance.prompt_tokens == expected["prompt_tokens"] + assert instance.encoding_format == expected["encoding_format"] + if expected["cosine_similarity"] is not None: + assert instance.cosine_similarity == pytest.approx( + expected["cosine_similarity"], rel=1e-6, abs=1e-6 + ) + + @pytest.mark.sanity + def test_invalid_initialization_missing(self): + """Missing required fields should fail validation.""" + with pytest.raises(ValidationError): + EmbeddingsRequestStats() # type: ignore[call-arg] + + @pytest.mark.sanity + @pytest.mark.parametrize( + ("field_name", "bad_value"), + [ + ("request_id", None), + ("request_id", 123), + ("info", None), + ("info", "not_request_info"), + ("input_metrics", None), + ("input_metrics", "not_usage_metrics"), + ("cosine_similarity", "not_a_float"), + ("encoding_format", 123), + ], + ) + def test_invalid_initialization_values(self, field_name: str, bad_value: Any): + """Type/None mismatches should raise.""" + info = RequestInfo(request_id="bad-1", status="completed") + info.timings.resolve_end = 1.0 + base = { + "request_id": "ok", + "info": info, + "input_metrics": UsageMetrics(text_tokens=1), + } + base[field_name] = bad_value + with pytest.raises(ValidationError): + EmbeddingsRequestStats(**base) # type: ignore[arg-type] + + @pytest.mark.regression + def test_computed_properties_match_expected(self, valid_instances): + """All computed properties should match precomputed expectations.""" + instance, expected = valid_instances + + # direct scalar comparisons + for key in ( + "request_start_time", + "request_end_time", + "request_latency", + "prompt_tokens", + ): + got = getattr(instance, key) + exp = expected[key] + if isinstance(exp, float): + # tolerant float compare + assert (got is None and exp is None) or pytest.approx( + exp, rel=1e-6, abs=1e-6 + ) == got + else: + assert got == exp + + @pytest.mark.sanity + def test_none_paths_for_latency(self): + """Ensure None is returned when required timing parts are missing.""" + info = RequestInfo(request_id="none-lat", status="completed") + info.timings.resolve_end = 1.0 # minimal to avoid property error + instance = EmbeddingsRequestStats( + request_id="none-lat", + info=info, + input_metrics=UsageMetrics(text_tokens=10), + ) + assert instance.request_latency is None + + @pytest.mark.smoke + def test_marshalling(self, valid_instances): + """model_dump / model_validate round-trip.""" + instance, _ = valid_instances + dumped = instance.model_dump() + assert dumped["type_"] == "embeddings_request_stats" + rebuilt = EmbeddingsRequestStats.model_validate(dumped) + assert rebuilt.request_id == instance.request_id + assert rebuilt.prompt_tokens == instance.prompt_tokens + assert rebuilt.encoding_format == instance.encoding_format + + @pytest.mark.sanity + def test_optional_fields(self): + """Test optional fields request_args, cosine_similarity.""" + info = RequestInfo(request_id="opt-test", status="completed") + info.timings.resolve_end = 10.0 + + # Without optional fields + instance = EmbeddingsRequestStats( + request_id="opt-test", + info=info, + input_metrics=UsageMetrics(text_tokens=5), + ) + assert instance.request_args is None + assert instance.cosine_similarity is None + assert instance.encoding_format == "float" # default + + # With optional fields + instance_with_opts = EmbeddingsRequestStats( + request_id="opt-test-2", + info=info, + input_metrics=UsageMetrics(text_tokens=5), + request_args="dimensions=384", + cosine_similarity=0.987, + encoding_format="base64", + ) + assert instance_with_opts.request_args == "dimensions=384" + assert instance_with_opts.cosine_similarity == 0.987 + assert instance_with_opts.encoding_format == "base64" + + @pytest.mark.sanity + def test_encoding_format_values(self): + """Test valid encoding format values.""" + info = RequestInfo(request_id="enc-test", status="completed") + info.timings.resolve_end = 10.0 + + # Float encoding + instance_float = EmbeddingsRequestStats( + request_id="enc-float", + info=info, + input_metrics=UsageMetrics(text_tokens=5), + encoding_format="float", + ) + assert instance_float.encoding_format == "float" + + # Base64 encoding + instance_base64 = EmbeddingsRequestStats( + request_id="enc-base64", + info=info, + input_metrics=UsageMetrics(text_tokens=5), + encoding_format="base64", + ) + assert instance_base64.encoding_format == "base64" + + @pytest.mark.sanity + def test_cosine_similarity_range(self): + """Test cosine similarity values within expected range.""" + info = RequestInfo(request_id="cos-test", status="completed") + info.timings.resolve_end = 10.0 + + # Valid cosine similarity values (-1 to 1) + for cos_val in [-1.0, -0.5, 0.0, 0.5, 0.99, 1.0]: + instance = EmbeddingsRequestStats( + request_id=f"cos-{cos_val}", + info=info, + input_metrics=UsageMetrics(text_tokens=5), + cosine_similarity=cos_val, + ) + assert instance.cosine_similarity == pytest.approx(cos_val, abs=1e-6) + + @pytest.mark.regression + def test_zero_division_edge_cases(self): + """Test edge cases that could cause zero division errors.""" + info = RequestInfo(request_id="zero-div", status="completed") + info.timings.resolve_end = 10.0 + info.timings.request_start = 10.0 # Same as end + info.timings.request_end = 10.0 + + stats = EmbeddingsRequestStats( + request_id="zero-div", + info=info, + input_metrics=UsageMetrics(text_tokens=5), + ) + + # Zero latency should be returned as 0.0 (not None, no division error) + assert stats.request_latency == 0.0 + + @pytest.mark.sanity + @pytest.mark.asyncio + @async_timeout(0.2) + async def test_async_context_usage(self, valid_instances): + """Light async smoke to satisfy async-timeout policy.""" + instance, expected = valid_instances + await asyncio.sleep(0) # yield + assert instance.request_id + assert instance.prompt_tokens == expected["prompt_tokens"] + assert instance.encoding_format == expected["encoding_format"] diff --git a/tox.ini b/tox.ini index b6ae685e6..ce4a84196 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,9 @@ env_list = py3{10,11,12,13} [testenv:tests] description = Run all tests dependency_groups = dev +deps = + pytest-httpx~=0.35.0 + respx~=0.22.0 commands = python -m pytest {posargs:tests/} diff --git a/uv.lock b/uv.lock index a068c69b2..8ca6a83bb 100644 --- a/uv.lock +++ b/uv.lock @@ -801,6 +801,7 @@ dependencies = [ { name = "ftfy" }, { name = "httpx", extra = ["http2"] }, { name = "loguru" }, + { name = "more-itertools" }, { name = "msgpack" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -824,8 +825,10 @@ all = [ { name = "mistral-common" }, { name = "msgpack" }, { name = "msgspec" }, + { name = "mteb" }, { name = "orjson" }, { name = "pillow" }, + { name = "sentence-transformers" }, { name = "tiktoken" }, { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, @@ -851,6 +854,7 @@ dev = [ { name = "mkdocs-linkcheck" }, { name = "msgpack" }, { name = "msgspec" }, + { name = "mteb" }, { name = "mypy" }, { name = "orjson" }, { name = "pandas-stubs" }, @@ -867,6 +871,7 @@ dev = [ { name = "ruff" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "sentence-transformers" }, { name = "setuptools" }, { name = "setuptools-git-versioning" }, { name = "sphinx" }, @@ -880,6 +885,10 @@ dev = [ { name = "types-toml" }, { name = "uvloop" }, ] +embeddings = [ + { name = "mteb" }, + { name = "sentence-transformers" }, +] perf = [ { name = "msgpack" }, { name = "msgspec" }, @@ -923,7 +932,7 @@ requires-dist = [ { name = "faker" }, { name = "ftfy", specifier = ">=6.0.0" }, { name = "guidellm", extras = ["all"], marker = "extra == 'dev'" }, - { name = "guidellm", extras = ["audio", "perf", "tokenizers", "vision"], marker = "extra == 'all'" }, + { name = "guidellm", extras = ["audio", "embeddings", "perf", "tokenizers", "vision"], marker = "extra == 'all'" }, { name = "guidellm", extras = ["perf", "tokenizers"], marker = "extra == 'recommended'" }, { name = "httpx", extras = ["http2"], specifier = "<1.0.0" }, { name = "loguru" }, @@ -934,9 +943,11 @@ requires-dist = [ { name = "mdformat-gfm", marker = "extra == 'dev'", specifier = "~=1.0.0" }, { name = "mistral-common", marker = "extra == 'tokenizers'" }, { name = "mkdocs-linkcheck", marker = "extra == 'dev'", specifier = "~=1.0.6" }, + { name = "more-itertools", specifier = ">=10.8.0" }, { name = "msgpack" }, { name = "msgpack", marker = "extra == 'perf'" }, { name = "msgspec", marker = "extra == 'perf'" }, + { name = "mteb", marker = "extra == 'embeddings'", specifier = ">=1.0.0" }, { name = "mypy", marker = "extra == 'dev'", specifier = "~=1.15.0" }, { name = "numpy", specifier = ">=2.0.0" }, { name = "orjson", marker = "extra == 'perf'" }, @@ -959,6 +970,7 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'", specifier = "~=0.11.7" }, { name = "sanic" }, { name = "scipy", marker = "extra == 'dev'", specifier = "~=1.10" }, + { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" }, { name = "setuptools", marker = "extra == 'dev'", specifier = ">=61.0" }, { name = "setuptools-git-versioning", marker = "extra == 'dev'", specifier = ">=2.0,<3" }, { name = "sphinx", marker = "extra == 'dev'", specifier = "~=7.1.2" }, @@ -975,7 +987,7 @@ requires-dist = [ { name = "uvloop", specifier = ">=0.18" }, { name = "uvloop", marker = "extra == 'perf'" }, ] -provides-extras = ["all", "recommended", "perf", "tokenizers", "audio", "vision", "dev"] +provides-extras = ["all", "recommended", "perf", "tokenizers", "audio", "vision", "embeddings", "dev"] [package.metadata.requires-dev] dev = [{ name = "guidellm", extras = ["dev"] }] @@ -1213,6 +1225,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "joblib" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -1591,6 +1612,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/74/87/240a21533662ba227ec683adcc187ec3a64e927ccf0c35f0d3b1b2fd331c/mkdocs_linkcheck-1.0.6-py3-none-any.whl", hash = "sha256:70dceae090101778002d949dc7b55f56eeb0c294bd9053fb6b197c26591665b1", size = 19759, upload-time = "2021-08-20T20:38:18.87Z" }, ] +[[package]] +name = "more-itertools" +version = "10.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -1717,6 +1747,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/3e/c5187de84bb2c2ca334ab163fcacf19a23ebb1d876c837f81a1b324a15bf/msgspec-0.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:93f23528edc51d9f686808a361728e903d6f2be55c901d6f5c92e44c6d546bfc", size = 183011, upload-time = "2025-11-24T03:56:16.442Z" }, ] +[[package]] +name = "mteb" +version = "2.7.30" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "datasets" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "polars" }, + { name = "pydantic" }, + { name = "pytrec-eval-terrier" }, + { name = "requests" }, + { name = "rich" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "sentence-transformers" }, + { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/fd/9498edc7037ebe1e0cd4f34b2e02b91cb27e97748985f12ced5770b62e18/mteb-2.7.30.tar.gz", hash = "sha256:a01a7ab0e2d4153c16c20d180b2380cd3e92b5bccae666a263460876755419f5", size = 3125915, upload-time = "2026-02-12T16:15:38.239Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/e0/32c942437499fb41a74ea55e41fee7e28ba9db31e1794dd435f6e13c8b4f/mteb-2.7.30-py3-none-any.whl", hash = "sha256:c2ee3da7ba4429e98d5d85d5280c1e44430b653d5983c6b5de83e19383bd678b", size = 4778663, upload-time = "2026-02-12T16:15:36.242Z" }, +] + [[package]] name = "multidict" version = "6.7.0" @@ -2283,100 +2341,100 @@ wheels = [ [[package]] name = "pillow" -version = "12.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/08/26e68b6b5da219c2a2cb7b563af008b53bb8e6b6fcb3fa40715fcdb2523a/pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b", size = 5289809, upload-time = "2025-10-15T18:21:27.791Z" }, - { url = "https://files.pythonhosted.org/packages/cb/e9/4e58fb097fb74c7b4758a680aacd558810a417d1edaa7000142976ef9d2f/pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1", size = 4650606, upload-time = "2025-10-15T18:21:29.823Z" }, - { url = "https://files.pythonhosted.org/packages/4b/e0/1fa492aa9f77b3bc6d471c468e62bfea1823056bf7e5e4f1914d7ab2565e/pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363", size = 6221023, upload-time = "2025-10-15T18:21:31.415Z" }, - { url = "https://files.pythonhosted.org/packages/c1/09/4de7cd03e33734ccd0c876f0251401f1314e819cbfd89a0fcb6e77927cc6/pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca", size = 8024937, upload-time = "2025-10-15T18:21:33.453Z" }, - { url = "https://files.pythonhosted.org/packages/2e/69/0688e7c1390666592876d9d474f5e135abb4acb39dcb583c4dc5490f1aff/pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e", size = 6334139, upload-time = "2025-10-15T18:21:35.395Z" }, - { url = "https://files.pythonhosted.org/packages/ed/1c/880921e98f525b9b44ce747ad1ea8f73fd7e992bafe3ca5e5644bf433dea/pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782", size = 7026074, upload-time = "2025-10-15T18:21:37.219Z" }, - { url = "https://files.pythonhosted.org/packages/28/03/96f718331b19b355610ef4ebdbbde3557c726513030665071fd025745671/pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10", size = 6448852, upload-time = "2025-10-15T18:21:39.168Z" }, - { url = "https://files.pythonhosted.org/packages/3a/a0/6a193b3f0cc9437b122978d2c5cbce59510ccf9a5b48825096ed7472da2f/pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa", size = 7117058, upload-time = "2025-10-15T18:21:40.997Z" }, - { url = "https://files.pythonhosted.org/packages/a7/c4/043192375eaa4463254e8e61f0e2ec9a846b983929a8d0a7122e0a6d6fff/pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275", size = 6295431, upload-time = "2025-10-15T18:21:42.518Z" }, - { url = "https://files.pythonhosted.org/packages/92/c6/c2f2fc7e56301c21827e689bb8b0b465f1b52878b57471a070678c0c33cd/pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d", size = 7000412, upload-time = "2025-10-15T18:21:44.404Z" }, - { url = "https://files.pythonhosted.org/packages/b2/d2/5f675067ba82da7a1c238a73b32e3fd78d67f9d9f80fbadd33a40b9c0481/pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7", size = 2435903, upload-time = "2025-10-15T18:21:46.29Z" }, - { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" }, - { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" }, - { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" }, - { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" }, - { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" }, - { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" }, - { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" }, - { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" }, - { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" }, - { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" }, - { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" }, - { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" }, - { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" }, - { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" }, - { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" }, - { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" }, - { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" }, - { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" }, - { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" }, - { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" }, - { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" }, - { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" }, - { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" }, - { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" }, - { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" }, - { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" }, - { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" }, - { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" }, - { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" }, - { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" }, - { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" }, - { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" }, - { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" }, - { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" }, - { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" }, - { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" }, - { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" }, - { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" }, - { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" }, - { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" }, - { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" }, - { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" }, - { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" }, - { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" }, - { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" }, - { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" }, - { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" }, - { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" }, - { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" }, - { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" }, - { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" }, - { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" }, - { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" }, - { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" }, - { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" }, - { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" }, - { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" }, - { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" }, - { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" }, - { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" }, - { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" }, - { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" }, - { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" }, - { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" }, - { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" }, - { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" }, - { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" }, - { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" }, - { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" }, - { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" }, - { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" }, - { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" }, - { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" }, - { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" }, - { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" }, - { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" }, - { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" }, - { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" }, +version = "12.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/30/5bd3d794762481f8c8ae9c80e7b76ecea73b916959eb587521358ef0b2f9/pillow-12.1.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f1625b72740fdda5d77b4def688eb8fd6490975d06b909fd19f13f391e077e0", size = 5304099, upload-time = "2026-02-11T04:20:06.13Z" }, + { url = "https://files.pythonhosted.org/packages/bd/c1/aab9e8f3eeb4490180e357955e15c2ef74b31f64790ff356c06fb6cf6d84/pillow-12.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:178aa072084bd88ec759052feca8e56cbb14a60b39322b99a049e58090479713", size = 4657880, upload-time = "2026-02-11T04:20:09.291Z" }, + { url = "https://files.pythonhosted.org/packages/f1/0a/9879e30d56815ad529d3985aeff5af4964202425c27261a6ada10f7cbf53/pillow-12.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b66e95d05ba806247aaa1561f080abc7975daf715c30780ff92a20e4ec546e1b", size = 6222587, upload-time = "2026-02-11T04:20:10.82Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5f/a1b72ff7139e4f89014e8d451442c74a774d5c43cd938fb0a9f878576b37/pillow-12.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89c7e895002bbe49cdc5426150377cbbc04767d7547ed145473f496dfa40408b", size = 8027678, upload-time = "2026-02-11T04:20:12.455Z" }, + { url = "https://files.pythonhosted.org/packages/e2/c2/c7cb187dac79a3d22c3ebeae727abee01e077c8c7d930791dc592f335153/pillow-12.1.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a5cbdcddad0af3da87cb16b60d23648bc3b51967eb07223e9fed77a82b457c4", size = 6335777, upload-time = "2026-02-11T04:20:14.441Z" }, + { url = "https://files.pythonhosted.org/packages/0c/7b/f9b09a7804ec7336effb96c26d37c29d27225783dc1501b7d62dcef6ae25/pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9f51079765661884a486727f0729d29054242f74b46186026582b4e4769918e4", size = 7027140, upload-time = "2026-02-11T04:20:16.387Z" }, + { url = "https://files.pythonhosted.org/packages/98/b2/2fa3c391550bd421b10849d1a2144c44abcd966daadd2f7c12e19ea988c4/pillow-12.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:99c1506ea77c11531d75e3a412832a13a71c7ebc8192ab9e4b2e355555920e3e", size = 6449855, upload-time = "2026-02-11T04:20:18.554Z" }, + { url = "https://files.pythonhosted.org/packages/96/ff/9caf4b5b950c669263c39e96c78c0d74a342c71c4f43fd031bb5cb7ceac9/pillow-12.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36341d06738a9f66c8287cf8b876d24b18db9bd8740fa0672c74e259ad408cff", size = 7151329, upload-time = "2026-02-11T04:20:20.646Z" }, + { url = "https://files.pythonhosted.org/packages/7b/f8/4b24841f582704da675ca535935bccb32b00a6da1226820845fac4a71136/pillow-12.1.1-cp310-cp310-win32.whl", hash = "sha256:6c52f062424c523d6c4db85518774cc3d50f5539dd6eed32b8f6229b26f24d40", size = 6325574, upload-time = "2026-02-11T04:20:22.43Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f9/9f6b01c0881d7036063aa6612ef04c0e2cad96be21325a1e92d0203f8e91/pillow-12.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6008de247150668a705a6338156efb92334113421ceecf7438a12c9a12dab23", size = 7032347, upload-time = "2026-02-11T04:20:23.932Z" }, + { url = "https://files.pythonhosted.org/packages/79/13/c7922edded3dcdaf10c59297540b72785620abc0538872c819915746757d/pillow-12.1.1-cp310-cp310-win_arm64.whl", hash = "sha256:1a9b0ee305220b392e1124a764ee4265bd063e54a751a6b62eff69992f457fa9", size = 2453457, upload-time = "2026-02-11T04:20:25.392Z" }, + { url = "https://files.pythonhosted.org/packages/2b/46/5da1ec4a5171ee7bf1a0efa064aba70ba3d6e0788ce3f5acd1375d23c8c0/pillow-12.1.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e879bb6cd5c73848ef3b2b48b8af9ff08c5b71ecda8048b7dd22d8a33f60be32", size = 5304084, upload-time = "2026-02-11T04:20:27.501Z" }, + { url = "https://files.pythonhosted.org/packages/78/93/a29e9bc02d1cf557a834da780ceccd54e02421627200696fcf805ebdc3fb/pillow-12.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:365b10bb9417dd4498c0e3b128018c4a624dc11c7b97d8cc54effe3b096f4c38", size = 4657866, upload-time = "2026-02-11T04:20:29.827Z" }, + { url = "https://files.pythonhosted.org/packages/13/84/583a4558d492a179d31e4aae32eadce94b9acf49c0337c4ce0b70e0a01f2/pillow-12.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4ce8e329c93845720cd2014659ca67eac35f6433fd3050393d85f3ecef0dad5", size = 6232148, upload-time = "2026-02-11T04:20:31.329Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e2/53c43334bbbb2d3b938978532fbda8e62bb6e0b23a26ce8592f36bcc4987/pillow-12.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc354a04072b765eccf2204f588a7a532c9511e8b9c7f900e1b64e3e33487090", size = 8038007, upload-time = "2026-02-11T04:20:34.225Z" }, + { url = "https://files.pythonhosted.org/packages/b8/a6/3d0e79c8a9d58150dd98e199d7c1c56861027f3829a3a60b3c2784190180/pillow-12.1.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e7976bf1910a8116b523b9f9f58bf410f3e8aa330cd9a2bb2953f9266ab49af", size = 6345418, upload-time = "2026-02-11T04:20:35.858Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c8/46dfeac5825e600579157eea177be43e2f7ff4a99da9d0d0a49533509ac5/pillow-12.1.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:597bd9c8419bc7c6af5604e55847789b69123bbe25d65cc6ad3012b4f3c98d8b", size = 7034590, upload-time = "2026-02-11T04:20:37.91Z" }, + { url = "https://files.pythonhosted.org/packages/af/bf/e6f65d3db8a8bbfeaf9e13cc0417813f6319863a73de934f14b2229ada18/pillow-12.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2c1fc0f2ca5f96a3c8407e41cca26a16e46b21060fe6d5b099d2cb01412222f5", size = 6458655, upload-time = "2026-02-11T04:20:39.496Z" }, + { url = "https://files.pythonhosted.org/packages/f9/c2/66091f3f34a25894ca129362e510b956ef26f8fb67a0e6417bc5744e56f1/pillow-12.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:578510d88c6229d735855e1f278aa305270438d36a05031dfaae5067cc8eb04d", size = 7159286, upload-time = "2026-02-11T04:20:41.139Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5a/24bc8eb526a22f957d0cec6243146744966d40857e3d8deb68f7902ca6c1/pillow-12.1.1-cp311-cp311-win32.whl", hash = "sha256:7311c0a0dcadb89b36b7025dfd8326ecfa36964e29913074d47382706e516a7c", size = 6328663, upload-time = "2026-02-11T04:20:43.184Z" }, + { url = "https://files.pythonhosted.org/packages/31/03/bef822e4f2d8f9d7448c133d0a18185d3cce3e70472774fffefe8b0ed562/pillow-12.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:fbfa2a7c10cc2623f412753cddf391c7f971c52ca40a3f65dc5039b2939e8563", size = 7031448, upload-time = "2026-02-11T04:20:44.696Z" }, + { url = "https://files.pythonhosted.org/packages/49/70/f76296f53610bd17b2e7d31728b8b7825e3ac3b5b3688b51f52eab7c0818/pillow-12.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:b81b5e3511211631b3f672a595e3221252c90af017e399056d0faabb9538aa80", size = 2453651, upload-time = "2026-02-11T04:20:46.243Z" }, + { url = "https://files.pythonhosted.org/packages/07/d3/8df65da0d4df36b094351dce696f2989bec731d4f10e743b1c5f4da4d3bf/pillow-12.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab323b787d6e18b3d91a72fc99b1a2c28651e4358749842b8f8dfacd28ef2052", size = 5262803, upload-time = "2026-02-11T04:20:47.653Z" }, + { url = "https://files.pythonhosted.org/packages/d6/71/5026395b290ff404b836e636f51d7297e6c83beceaa87c592718747e670f/pillow-12.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adebb5bee0f0af4909c30db0d890c773d1a92ffe83da908e2e9e720f8edf3984", size = 4657601, upload-time = "2026-02-11T04:20:49.328Z" }, + { url = "https://files.pythonhosted.org/packages/b1/2e/1001613d941c67442f745aff0f7cc66dd8df9a9c084eb497e6a543ee6f7e/pillow-12.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb66b7cc26f50977108790e2456b7921e773f23db5630261102233eb355a3b79", size = 6234995, upload-time = "2026-02-11T04:20:51.032Z" }, + { url = "https://files.pythonhosted.org/packages/07/26/246ab11455b2549b9233dbd44d358d033a2f780fa9007b61a913c5b2d24e/pillow-12.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aee2810642b2898bb187ced9b349e95d2a7272930796e022efaf12e99dccd293", size = 8045012, upload-time = "2026-02-11T04:20:52.882Z" }, + { url = "https://files.pythonhosted.org/packages/b2/8b/07587069c27be7535ac1fe33874e32de118fbd34e2a73b7f83436a88368c/pillow-12.1.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b1cd6232e2b618adcc54d9882e4e662a089d5768cd188f7c245b4c8c44a397", size = 6349638, upload-time = "2026-02-11T04:20:54.444Z" }, + { url = "https://files.pythonhosted.org/packages/ff/79/6df7b2ee763d619cda2fb4fea498e5f79d984dae304d45a8999b80d6cf5c/pillow-12.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7aac39bcf8d4770d089588a2e1dd111cbaa42df5a94be3114222057d68336bd0", size = 7041540, upload-time = "2026-02-11T04:20:55.97Z" }, + { url = "https://files.pythonhosted.org/packages/2c/5e/2ba19e7e7236d7529f4d873bdaf317a318896bac289abebd4bb00ef247f0/pillow-12.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ab174cd7d29a62dd139c44bf74b698039328f45cb03b4596c43473a46656b2f3", size = 6462613, upload-time = "2026-02-11T04:20:57.542Z" }, + { url = "https://files.pythonhosted.org/packages/03/03/31216ec124bb5c3dacd74ce8efff4cc7f52643653bad4825f8f08c697743/pillow-12.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:339ffdcb7cbeaa08221cd401d517d4b1fe7a9ed5d400e4a8039719238620ca35", size = 7166745, upload-time = "2026-02-11T04:20:59.196Z" }, + { url = "https://files.pythonhosted.org/packages/1f/e7/7c4552d80052337eb28653b617eafdef39adfb137c49dd7e831b8dc13bc5/pillow-12.1.1-cp312-cp312-win32.whl", hash = "sha256:5d1f9575a12bed9e9eedd9a4972834b08c97a352bd17955ccdebfeca5913fa0a", size = 6328823, upload-time = "2026-02-11T04:21:01.385Z" }, + { url = "https://files.pythonhosted.org/packages/3d/17/688626d192d7261bbbf98846fc98995726bddc2c945344b65bec3a29d731/pillow-12.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:21329ec8c96c6e979cd0dfd29406c40c1d52521a90544463057d2aaa937d66a6", size = 7033367, upload-time = "2026-02-11T04:21:03.536Z" }, + { url = "https://files.pythonhosted.org/packages/ed/fe/a0ef1f73f939b0eca03ee2c108d0043a87468664770612602c63266a43c4/pillow-12.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:af9a332e572978f0218686636610555ae3defd1633597be015ed50289a03c523", size = 2453811, upload-time = "2026-02-11T04:21:05.116Z" }, + { url = "https://files.pythonhosted.org/packages/d5/11/6db24d4bd7685583caeae54b7009584e38da3c3d4488ed4cd25b439de486/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:d242e8ac078781f1de88bf823d70c1a9b3c7950a44cdf4b7c012e22ccbcd8e4e", size = 4062689, upload-time = "2026-02-11T04:21:06.804Z" }, + { url = "https://files.pythonhosted.org/packages/33/c0/ce6d3b1fe190f0021203e0d9b5b99e57843e345f15f9ef22fcd43842fd21/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:02f84dfad02693676692746df05b89cf25597560db2857363a208e393429f5e9", size = 4138535, upload-time = "2026-02-11T04:21:08.452Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c6/d5eb6a4fb32a3f9c21a8c7613ec706534ea1cf9f4b3663e99f0d83f6fca8/pillow-12.1.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:e65498daf4b583091ccbb2556c7000abf0f3349fcd57ef7adc9a84a394ed29f6", size = 3601364, upload-time = "2026-02-11T04:21:10.194Z" }, + { url = "https://files.pythonhosted.org/packages/14/a1/16c4b823838ba4c9c52c0e6bbda903a3fe5a1bdbf1b8eb4fff7156f3e318/pillow-12.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c6db3b84c87d48d0088943bf33440e0c42370b99b1c2a7989216f7b42eede60", size = 5262561, upload-time = "2026-02-11T04:21:11.742Z" }, + { url = "https://files.pythonhosted.org/packages/bb/ad/ad9dc98ff24f485008aa5cdedaf1a219876f6f6c42a4626c08bc4e80b120/pillow-12.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b7e5304e34942bf62e15184219a7b5ad4ff7f3bb5cca4d984f37df1a0e1aee2", size = 4657460, upload-time = "2026-02-11T04:21:13.786Z" }, + { url = "https://files.pythonhosted.org/packages/9e/1b/f1a4ea9a895b5732152789326202a82464d5254759fbacae4deea3069334/pillow-12.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5bddd742a44b7e6b1e773ab5db102bd7a94c32555ba656e76d319d19c3850", size = 6232698, upload-time = "2026-02-11T04:21:15.949Z" }, + { url = "https://files.pythonhosted.org/packages/95/f4/86f51b8745070daf21fd2e5b1fe0eb35d4db9ca26e6d58366562fb56a743/pillow-12.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc44ef1f3de4f45b50ccf9136999d71abb99dca7706bc75d222ed350b9fd2289", size = 8041706, upload-time = "2026-02-11T04:21:17.723Z" }, + { url = "https://files.pythonhosted.org/packages/29/9b/d6ecd956bb1266dd1045e995cce9b8d77759e740953a1c9aad9502a0461e/pillow-12.1.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a8eb7ed8d4198bccbd07058416eeec51686b498e784eda166395a23eb99138e", size = 6346621, upload-time = "2026-02-11T04:21:19.547Z" }, + { url = "https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717", size = 7038069, upload-time = "2026-02-11T04:21:21.378Z" }, + { url = "https://files.pythonhosted.org/packages/94/0e/58cb1a6bc48f746bc4cb3adb8cabff73e2742c92b3bf7a220b7cf69b9177/pillow-12.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:518a48c2aab7ce596d3bf79d0e275661b846e86e4d0e7dec34712c30fe07f02a", size = 6460040, upload-time = "2026-02-11T04:21:23.148Z" }, + { url = "https://files.pythonhosted.org/packages/6c/57/9045cb3ff11eeb6c1adce3b2d60d7d299d7b273a2e6c8381a524abfdc474/pillow-12.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a550ae29b95c6dc13cf69e2c9dc5747f814c54eeb2e32d683e5e93af56caa029", size = 7164523, upload-time = "2026-02-11T04:21:25.01Z" }, + { url = "https://files.pythonhosted.org/packages/73/f2/9be9cb99f2175f0d4dbadd6616ce1bf068ee54a28277ea1bf1fbf729c250/pillow-12.1.1-cp313-cp313-win32.whl", hash = "sha256:a003d7422449f6d1e3a34e3dd4110c22148336918ddbfc6a32581cd54b2e0b2b", size = 6332552, upload-time = "2026-02-11T04:21:27.238Z" }, + { url = "https://files.pythonhosted.org/packages/3f/eb/b0834ad8b583d7d9d42b80becff092082a1c3c156bb582590fcc973f1c7c/pillow-12.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:344cf1e3dab3be4b1fa08e449323d98a2a3f819ad20f4b22e77a0ede31f0faa1", size = 7040108, upload-time = "2026-02-11T04:21:29.462Z" }, + { url = "https://files.pythonhosted.org/packages/d5/7d/fc09634e2aabdd0feabaff4a32f4a7d97789223e7c2042fd805ea4b4d2c2/pillow-12.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:5c0dd1636633e7e6a0afe7bf6a51a14992b7f8e60de5789018ebbdfae55b040a", size = 2453712, upload-time = "2026-02-11T04:21:31.072Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/b9d62794fc8a0dd14c1943df68347badbd5511103e0d04c035ffe5cf2255/pillow-12.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0330d233c1a0ead844fc097a7d16c0abff4c12e856c0b325f231820fee1f39da", size = 5264880, upload-time = "2026-02-11T04:21:32.865Z" }, + { url = "https://files.pythonhosted.org/packages/26/9d/e03d857d1347fa5ed9247e123fcd2a97b6220e15e9cb73ca0a8d91702c6e/pillow-12.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dae5f21afb91322f2ff791895ddd8889e5e947ff59f71b46041c8ce6db790bc", size = 4660616, upload-time = "2026-02-11T04:21:34.97Z" }, + { url = "https://files.pythonhosted.org/packages/f7/ec/8a6d22afd02570d30954e043f09c32772bfe143ba9285e2fdb11284952cd/pillow-12.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e0c664be47252947d870ac0d327fea7e63985a08794758aa8af5b6cb6ec0c9c", size = 6269008, upload-time = "2026-02-11T04:21:36.623Z" }, + { url = "https://files.pythonhosted.org/packages/3d/1d/6d875422c9f28a4a361f495a5f68d9de4a66941dc2c619103ca335fa6446/pillow-12.1.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:691ab2ac363b8217f7d31b3497108fb1f50faab2f75dfb03284ec2f217e87bf8", size = 8073226, upload-time = "2026-02-11T04:21:38.585Z" }, + { url = "https://files.pythonhosted.org/packages/a1/cd/134b0b6ee5eda6dc09e25e24b40fdafe11a520bc725c1d0bbaa5e00bf95b/pillow-12.1.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9e8064fb1cc019296958595f6db671fba95209e3ceb0c4734c9baf97de04b20", size = 6380136, upload-time = "2026-02-11T04:21:40.562Z" }, + { url = "https://files.pythonhosted.org/packages/7a/a9/7628f013f18f001c1b98d8fffe3452f306a70dc6aba7d931019e0492f45e/pillow-12.1.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:472a8d7ded663e6162dafdf20015c486a7009483ca671cece7a9279b512fcb13", size = 7067129, upload-time = "2026-02-11T04:21:42.521Z" }, + { url = "https://files.pythonhosted.org/packages/1e/f8/66ab30a2193b277785601e82ee2d49f68ea575d9637e5e234faaa98efa4c/pillow-12.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:89b54027a766529136a06cfebeecb3a04900397a3590fd252160b888479517bf", size = 6491807, upload-time = "2026-02-11T04:21:44.22Z" }, + { url = "https://files.pythonhosted.org/packages/da/0b/a877a6627dc8318fdb84e357c5e1a758c0941ab1ddffdafd231983788579/pillow-12.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:86172b0831b82ce4f7877f280055892b31179e1576aa00d0df3bb1bbf8c3e524", size = 7190954, upload-time = "2026-02-11T04:21:46.114Z" }, + { url = "https://files.pythonhosted.org/packages/83/43/6f732ff85743cf746b1361b91665d9f5155e1483817f693f8d57ea93147f/pillow-12.1.1-cp313-cp313t-win32.whl", hash = "sha256:44ce27545b6efcf0fdbdceb31c9a5bdea9333e664cda58a7e674bb74608b3986", size = 6336441, upload-time = "2026-02-11T04:21:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/3b/44/e865ef3986611bb75bfabdf94a590016ea327833f434558801122979cd0e/pillow-12.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a285e3eb7a5a45a2ff504e31f4a8d1b12ef62e84e5411c6804a42197c1cf586c", size = 7045383, upload-time = "2026-02-11T04:21:50.015Z" }, + { url = "https://files.pythonhosted.org/packages/a8/c6/f4fb24268d0c6908b9f04143697ea18b0379490cb74ba9e8d41b898bd005/pillow-12.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cc7d296b5ea4d29e6570dabeaed58d31c3fea35a633a69679fb03d7664f43fb3", size = 2456104, upload-time = "2026-02-11T04:21:51.633Z" }, + { url = "https://files.pythonhosted.org/packages/03/d0/bebb3ffbf31c5a8e97241476c4cf8b9828954693ce6744b4a2326af3e16b/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af", size = 4062652, upload-time = "2026-02-11T04:21:53.19Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c0/0e16fb0addda4851445c28f8350d8c512f09de27bbb0d6d0bbf8b6709605/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f", size = 4138823, upload-time = "2026-02-11T04:22:03.088Z" }, + { url = "https://files.pythonhosted.org/packages/6b/fb/6170ec655d6f6bb6630a013dd7cf7bc218423d7b5fa9071bf63dc32175ae/pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642", size = 3601143, upload-time = "2026-02-11T04:22:04.909Z" }, + { url = "https://files.pythonhosted.org/packages/59/04/dc5c3f297510ba9a6837cbb318b87dd2b8f73eb41a43cc63767f65cb599c/pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd", size = 5266254, upload-time = "2026-02-11T04:22:07.656Z" }, + { url = "https://files.pythonhosted.org/packages/05/30/5db1236b0d6313f03ebf97f5e17cda9ca060f524b2fcc875149a8360b21c/pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202", size = 4657499, upload-time = "2026-02-11T04:22:09.613Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/008d2ca0eb612e81968e8be0bbae5051efba24d52debf930126d7eaacbba/pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f", size = 6232137, upload-time = "2026-02-11T04:22:11.434Z" }, + { url = "https://files.pythonhosted.org/packages/70/f1/f14d5b8eeb4b2cd62b9f9f847eb6605f103df89ef619ac68f92f748614ea/pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f", size = 8042721, upload-time = "2026-02-11T04:22:13.321Z" }, + { url = "https://files.pythonhosted.org/packages/5a/d6/17824509146e4babbdabf04d8171491fa9d776f7061ff6e727522df9bd03/pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f", size = 6347798, upload-time = "2026-02-11T04:22:15.449Z" }, + { url = "https://files.pythonhosted.org/packages/d1/ee/c85a38a9ab92037a75615aba572c85ea51e605265036e00c5b67dfafbfe2/pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e", size = 7039315, upload-time = "2026-02-11T04:22:17.24Z" }, + { url = "https://files.pythonhosted.org/packages/ec/f3/bc8ccc6e08a148290d7523bde4d9a0d6c981db34631390dc6e6ec34cacf6/pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0", size = 6462360, upload-time = "2026-02-11T04:22:19.111Z" }, + { url = "https://files.pythonhosted.org/packages/f6/ab/69a42656adb1d0665ab051eec58a41f169ad295cf81ad45406963105408f/pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb", size = 7165438, upload-time = "2026-02-11T04:22:21.041Z" }, + { url = "https://files.pythonhosted.org/packages/02/46/81f7aa8941873f0f01d4b55cc543b0a3d03ec2ee30d617a0448bf6bd6dec/pillow-12.1.1-cp314-cp314-win32.whl", hash = "sha256:03edcc34d688572014ff223c125a3f77fb08091e4607e7745002fc214070b35f", size = 6431503, upload-time = "2026-02-11T04:22:22.833Z" }, + { url = "https://files.pythonhosted.org/packages/40/72/4c245f7d1044b67affc7f134a09ea619d4895333d35322b775b928180044/pillow-12.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:50480dcd74fa63b8e78235957d302d98d98d82ccbfac4c7e12108ba9ecbdba15", size = 7176748, upload-time = "2026-02-11T04:22:24.64Z" }, + { url = "https://files.pythonhosted.org/packages/e4/ad/8a87bdbe038c5c698736e3348af5c2194ffb872ea52f11894c95f9305435/pillow-12.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:5cb1785d97b0c3d1d1a16bc1d710c4a0049daefc4935f3a8f31f827f4d3d2e7f", size = 2544314, upload-time = "2026-02-11T04:22:26.685Z" }, + { url = "https://files.pythonhosted.org/packages/6c/9d/efd18493f9de13b87ede7c47e69184b9e859e4427225ea962e32e56a49bc/pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8", size = 5268612, upload-time = "2026-02-11T04:22:29.884Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f1/4f42eb2b388eb2ffc660dcb7f7b556c1015c53ebd5f7f754965ef997585b/pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9", size = 4660567, upload-time = "2026-02-11T04:22:31.799Z" }, + { url = "https://files.pythonhosted.org/packages/01/54/df6ef130fa43e4b82e32624a7b821a2be1c5653a5fdad8469687a7db4e00/pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60", size = 6269951, upload-time = "2026-02-11T04:22:33.921Z" }, + { url = "https://files.pythonhosted.org/packages/a9/48/618752d06cc44bb4aae8ce0cd4e6426871929ed7b46215638088270d9b34/pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7", size = 8074769, upload-time = "2026-02-11T04:22:35.877Z" }, + { url = "https://files.pythonhosted.org/packages/c3/bd/f1d71eb39a72fa088d938655afba3e00b38018d052752f435838961127d8/pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f", size = 6381358, upload-time = "2026-02-11T04:22:37.698Z" }, + { url = "https://files.pythonhosted.org/packages/64/ef/c784e20b96674ed36a5af839305f55616f8b4f8aa8eeccf8531a6e312243/pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586", size = 7068558, upload-time = "2026-02-11T04:22:39.597Z" }, + { url = "https://files.pythonhosted.org/packages/73/cb/8059688b74422ae61278202c4e1ad992e8a2e7375227be0a21c6b87ca8d5/pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce", size = 6493028, upload-time = "2026-02-11T04:22:42.73Z" }, + { url = "https://files.pythonhosted.org/packages/c6/da/e3c008ed7d2dd1f905b15949325934510b9d1931e5df999bb15972756818/pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8", size = 7191940, upload-time = "2026-02-11T04:22:44.543Z" }, + { url = "https://files.pythonhosted.org/packages/01/4a/9202e8d11714c1fc5951f2e1ef362f2d7fbc595e1f6717971d5dd750e969/pillow-12.1.1-cp314-cp314t-win32.whl", hash = "sha256:d2912fd8114fc5545aa3a4b5576512f64c55a03f3ebcca4c10194d593d43ea36", size = 6438736, upload-time = "2026-02-11T04:22:46.347Z" }, + { url = "https://files.pythonhosted.org/packages/f3/ca/cbce2327eb9885476b3957b2e82eb12c866a8b16ad77392864ad601022ce/pillow-12.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4ceb838d4bd9dab43e06c363cab2eebf63846d6a4aeaea283bbdfd8f1a8ed58b", size = 7182894, upload-time = "2026-02-11T04:22:48.114Z" }, + { url = "https://files.pythonhosted.org/packages/ec/d2/de599c95ba0a973b94410477f8bf0b6f0b5e67360eb89bcb1ad365258beb/pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334", size = 2546446, upload-time = "2026-02-11T04:22:50.342Z" }, + { url = "https://files.pythonhosted.org/packages/56/11/5d43209aa4cb58e0cc80127956ff1796a68b928e6324bbf06ef4db34367b/pillow-12.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:600fd103672b925fe62ed08e0d874ea34d692474df6f4bf7ebe148b30f89f39f", size = 5228606, upload-time = "2026-02-11T04:22:52.106Z" }, + { url = "https://files.pythonhosted.org/packages/5f/d5/3b005b4e4fda6698b371fa6c21b097d4707585d7db99e98d9b0b87ac612a/pillow-12.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:665e1b916b043cef294bc54d47bf02d87e13f769bc4bc5fa225a24b3a6c5aca9", size = 4622321, upload-time = "2026-02-11T04:22:53.827Z" }, + { url = "https://files.pythonhosted.org/packages/df/36/ed3ea2d594356fd8037e5a01f6156c74bc8d92dbb0fa60746cc96cabb6e8/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:495c302af3aad1ca67420ddd5c7bd480c8867ad173528767d906428057a11f0e", size = 5247579, upload-time = "2026-02-11T04:22:56.094Z" }, + { url = "https://files.pythonhosted.org/packages/54/9a/9cc3e029683cf6d20ae5085da0dafc63148e3252c2f13328e553aaa13cfb/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8fd420ef0c52c88b5a035a0886f367748c72147b2b8f384c9d12656678dfdfa9", size = 6989094, upload-time = "2026-02-11T04:22:58.288Z" }, + { url = "https://files.pythonhosted.org/packages/00/98/fc53ab36da80b88df0967896b6c4b4cd948a0dc5aa40a754266aa3ae48b3/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f975aa7ef9684ce7e2c18a3aa8f8e2106ce1e46b94ab713d156b2898811651d3", size = 5313850, upload-time = "2026-02-11T04:23:00.554Z" }, + { url = "https://files.pythonhosted.org/packages/30/02/00fa585abfd9fe9d73e5f6e554dc36cc2b842898cbfc46d70353dae227f8/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8089c852a56c2966cf18835db62d9b34fef7ba74c726ad943928d494fa7f4735", size = 5963343, upload-time = "2026-02-11T04:23:02.934Z" }, + { url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" }, ] [[package]] @@ -2397,6 +2455,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "polars" +version = "1.38.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "polars-runtime-32" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/5e/208a24471a433bcd0e9a6889ac49025fd4daad2815c8220c5bd2576e5f1b/polars-1.38.1.tar.gz", hash = "sha256:803a2be5344ef880ad625addfb8f641995cfd777413b08a10de0897345778239", size = 717667, upload-time = "2026-02-06T18:13:23.013Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/49/737c1a6273c585719858261753da0b688454d1b634438ccba8a9c4eb5aab/polars-1.38.1-py3-none-any.whl", hash = "sha256:a29479c48fed4984d88b656486d221f638cba45d3e961631a50ee5fdde38cb2c", size = 810368, upload-time = "2026-02-06T18:11:55.819Z" }, +] + +[[package]] +name = "polars-runtime-32" +version = "1.38.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/07/4b/04d6b3fb7cf336fbe12fbc4b43f36d1783e11bb0f2b1e3980ec44878df06/polars_runtime_32-1.38.1.tar.gz", hash = "sha256:04f20ed1f5c58771f34296a27029dc755a9e4b1390caeaef8f317e06fdfce2ec", size = 2812631, upload-time = "2026-02-06T18:13:25.206Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/a2/a00defbddadd8cf1042f52380dcba6b6592b03bac8e3b34c436b62d12d3b/polars_runtime_32-1.38.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:18154e96044724a0ac38ce155cf63aa03c02dd70500efbbf1a61b08cadd269ef", size = 44108001, upload-time = "2026-02-06T18:11:58.127Z" }, + { url = "https://files.pythonhosted.org/packages/a7/fb/599ff3709e6a303024efd7edfd08cf8de55c6ac39527d8f41cbc4399385f/polars_runtime_32-1.38.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:c49acac34cc4049ed188f1eb67d6ff3971a39b4af7f7b734b367119970f313ac", size = 40230140, upload-time = "2026-02-06T18:12:01.181Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8c/3ac18d6f89dc05fe2c7c0ee1dc5b81f77a5c85ad59898232c2500fe2ebbf/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fef2ef2626a954e010e006cc8e4de467ecf32d08008f130cea1c78911f545323", size = 41994039, upload-time = "2026-02-06T18:12:04.332Z" }, + { url = "https://files.pythonhosted.org/packages/f2/5a/61d60ec5cc0ab37cbd5a699edb2f9af2875b7fdfdfb2a4608ca3cc5f0448/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8a5f7a8125e2d50e2e060296551c929aec09be23a9edcb2b12ca923f555a5ba", size = 45755804, upload-time = "2026-02-06T18:12:07.846Z" }, + { url = "https://files.pythonhosted.org/packages/91/54/02cd4074c98c361ccd3fec3bcb0bd68dbc639c0550c42a4436b0ff0f3ccf/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:10d19cd9863e129273b18b7fcaab625b5c8143c2d22b3e549067b78efa32e4fa", size = 42159605, upload-time = "2026-02-06T18:12:10.919Z" }, + { url = "https://files.pythonhosted.org/packages/8e/f3/b2a5e720cc56eaa38b4518e63aa577b4bbd60e8b05a00fe43ca051be5879/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61e8d73c614b46a00d2f853625a7569a2e4a0999333e876354ac81d1bf1bb5e2", size = 45336615, upload-time = "2026-02-06T18:12:14.074Z" }, + { url = "https://files.pythonhosted.org/packages/f1/8d/ee2e4b7de948090cfb3df37d401c521233daf97bfc54ddec5d61d1d31618/polars_runtime_32-1.38.1-cp310-abi3-win_amd64.whl", hash = "sha256:08c2b3b93509c1141ac97891294ff5c5b0c548a373f583eaaea873a4bf506437", size = 45680732, upload-time = "2026-02-06T18:12:19.097Z" }, + { url = "https://files.pythonhosted.org/packages/bf/18/72c216f4ab0c82b907009668f79183ae029116ff0dd245d56ef58aac48e7/polars_runtime_32-1.38.1-cp310-abi3-win_arm64.whl", hash = "sha256:6d07d0cc832bfe4fb54b6e04218c2c27afcfa6b9498f9f6bbf262a00d58cc7c4", size = 41639413, upload-time = "2026-02-06T18:12:22.044Z" }, +] + [[package]] name = "pre-commit" version = "3.5.0" @@ -2953,6 +3039,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, ] +[[package]] +name = "pytrec-eval-terrier" +version = "0.5.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/96/4925a95e4865a647bc74d3bb052243d12a3c8e8a34909d7d097b5a4d08c5/pytrec_eval_terrier-0.5.10.tar.gz", hash = "sha256:eaaf20580d17b5575a233e04dab8a4cbcc01a7e45be8cf547c07f0a2bb3e7eb9", size = 18634, upload-time = "2025-10-20T16:50:18.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/a6/09a081ea7cf76c680b5fa8367836cba5a019d1de5be295081992a0addfc1/pytrec_eval_terrier-0.5.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5e574b2d4285d42e3bdc7ca0d9724d46c3bce06d3ee5d6c20e90fdea19761a2f", size = 136811, upload-time = "2025-10-20T16:50:38.729Z" }, + { url = "https://files.pythonhosted.org/packages/e3/b5/f18b1ad8936a38a7b1d51913189cd53d477d513cd48b79c7cb9bb7dc980f/pytrec_eval_terrier-0.5.10-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e69c78878379e3e5e280ecf91e9c3bd882f637763d2378655bb0f121e62efbd4", size = 303698, upload-time = "2025-10-20T16:54:13.674Z" }, + { url = "https://files.pythonhosted.org/packages/c5/1a/2d6a268d2327c38547b4e4a0f815fd51b4a93ab3ee5639260e82def444bb/pytrec_eval_terrier-0.5.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:04266dd7869276ae025399df69bf050bba26043b37426cd482fb9bcaa2b78ffa", size = 1327102, upload-time = "2025-10-20T16:54:14.551Z" }, + { url = "https://files.pythonhosted.org/packages/44/d2/283bb904ee40d0a1bb6858e018fca63043632ac0426e4c5badd5548cc753/pytrec_eval_terrier-0.5.10-cp310-cp310-win_amd64.whl", hash = "sha256:bb0bb4495f10a0bff95f97a8c17df67c967d611c9fc1a5db13e143e7888b102e", size = 58611, upload-time = "2025-10-20T16:52:01.714Z" }, + { url = "https://files.pythonhosted.org/packages/18/de/7659555355381e57a73e7ba31437dc31d3df146b5cc3fb66eb032683e84e/pytrec_eval_terrier-0.5.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1036735d4a12d1c92eea38a14a071168a292f8696099e90742c2c701479f010b", size = 136866, upload-time = "2025-10-20T16:50:40.054Z" }, + { url = "https://files.pythonhosted.org/packages/d3/d7/1cbc2d3936eec51b57e1146840eb3ccd8a9fb2debc519d7aa748f13dd724/pytrec_eval_terrier-0.5.10-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b36a2fbdccc7669c4b8aba1f6de2a661e6f2f77c10f05855eda55dda60fc88f5", size = 304025, upload-time = "2025-10-20T16:54:15.957Z" }, + { url = "https://files.pythonhosted.org/packages/7a/a2/84c93f0a260d0dabca007a02b206981d235c7f4b4c569ec746b5ef6d965b/pytrec_eval_terrier-0.5.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e4ca19110f24922d7435cf9ef9951a61f0b575488b6a1db86081d82b88dd621", size = 1327402, upload-time = "2025-10-20T16:54:16.842Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ee/3a20da0523228f54d8b89b9a11d7ec402625086cc3167fb940e36a9e2d5b/pytrec_eval_terrier-0.5.10-cp311-cp311-win_amd64.whl", hash = "sha256:d36e9a8966560ed10bc5aeb30c5c29a53d3fe8e4ccb6ff6bb026bffb21be3fe3", size = 58558, upload-time = "2025-10-20T16:51:46.032Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ca/f0edd9df08c08c96d2f088c298cfb824c3ee816302ac1f911ecb1bfdd681/pytrec_eval_terrier-0.5.10-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28c3c14728713cdbad165964e2d1aba96b0fc7445a5a13168b398e9bd3bbd08", size = 137179, upload-time = "2025-10-20T16:51:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/73/55/e02a14b0d3ac520849f66391f03c6783b3383fd23a19372d07a2280b815e/pytrec_eval_terrier-0.5.10-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:689ee541d72c27d14ae15cd1f11d2cb86cf9bdc880f5e8af9c5dbbdd47663d4d", size = 304845, upload-time = "2025-10-20T16:54:17.791Z" }, + { url = "https://files.pythonhosted.org/packages/76/9c/9020b700199b09ebdfc6dbadae81641a49555c4ee21dedbe2aa98af601b5/pytrec_eval_terrier-0.5.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3f02118dadd3c09b71462bb26e405e49bd10fe0c60bcc169fcd31454a4256dc2", size = 1327965, upload-time = "2025-10-20T16:54:18.743Z" }, + { url = "https://files.pythonhosted.org/packages/39/9e/6e7c2b89f52e1cebeef6c3bb47272f5bd69766ddbc6e9e5445da0c876899/pytrec_eval_terrier-0.5.10-cp312-cp312-win_amd64.whl", hash = "sha256:202e48fe24948453fe45dcd73261f9865f99cb2ff4c8a3255ac2ab4c993a64ba", size = 58641, upload-time = "2025-10-20T16:51:26.148Z" }, + { url = "https://files.pythonhosted.org/packages/93/21/71a0dee7e2cd368237432af6bf6051ffde03370730dc1666cd39494c82a7/pytrec_eval_terrier-0.5.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:fcf96c33446c16de8db78e829c5279f7404ceaaf6b502bb5a6a3669b06051601", size = 137186, upload-time = "2025-10-20T16:50:22.941Z" }, + { url = "https://files.pythonhosted.org/packages/5c/8c/2494edf20d726bdd3ee0a20dc5ed84351c6cc6ccc17b11b474e315808762/pytrec_eval_terrier-0.5.10-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8455485f1faf6759f1be11b12c904d1c749ba5db7e2b6f414aa56e19533ce069", size = 304917, upload-time = "2025-10-20T16:54:20.486Z" }, + { url = "https://files.pythonhosted.org/packages/cf/51/7611546afb55548e65db35354a63b90d5fd5ea593fc64e5993088bf61415/pytrec_eval_terrier-0.5.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e7cc9666305281b0ca1873761dc71cd3f0863e6d759f00a12fd363aa2d558d6f", size = 1327998, upload-time = "2025-10-20T16:54:21.375Z" }, + { url = "https://files.pythonhosted.org/packages/74/b3/20941b4dbe3b267271ed1ef80aa93b348da674aecb5d6aca8f311c4738b0/pytrec_eval_terrier-0.5.10-cp313-cp313-win_amd64.whl", hash = "sha256:9440bd4a78ee0bc5db6821d7483e962a6c494303fd26598f84f00d54cc64cdd7", size = 58631, upload-time = "2025-10-20T16:51:05.08Z" }, + { url = "https://files.pythonhosted.org/packages/f0/34/e3d0f75286151d97537309b3f311e1269b0194e3823038fc39054e84c3b4/pytrec_eval_terrier-0.5.10-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:70bc61b8d02e61a37ed97c088282bb0a124b58e7141cc52756512750efabacbb", size = 137320, upload-time = "2025-10-20T16:50:50.92Z" }, + { url = "https://files.pythonhosted.org/packages/1c/72/2c1f9fd44ed7a5657654a712e5255019d5d23ba2b3d53848da1838bfb8df/pytrec_eval_terrier-0.5.10-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d52d94803c32cadbff7fe5195b0d0d68d27393092f64207fe8250a4485d1f8d7", size = 304917, upload-time = "2025-10-20T16:54:22.59Z" }, + { url = "https://files.pythonhosted.org/packages/66/9d/7e440de7b37dd31cd78eefe2ec1bf3e5f49db42b17b34dc8d6006ee03fc5/pytrec_eval_terrier-0.5.10-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:77950d0ce9bd960af40efede6850e7b6519400e7fda3f9313e0d0d02c247e4e2", size = 1327991, upload-time = "2025-10-20T16:54:23.76Z" }, + { url = "https://files.pythonhosted.org/packages/ef/94/5639d7c346935a75540c1f1798be277c161b561001f2a91ef303e3d85f10/pytrec_eval_terrier-0.5.10-cp314-cp314-win_amd64.whl", hash = "sha256:c69681fec350fa94af45dd7ef8f53f605e89f752583c814f713d7d2329435cfc", size = 60178, upload-time = "2025-10-20T16:51:50.946Z" }, + { url = "https://files.pythonhosted.org/packages/f4/a7/9080fe3f971397ea4447e3bda0c350225c944047ede7927c9a1f788af000/pytrec_eval_terrier-0.5.10-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:876740f3d58625058d34aaa1939be31bf253ecacd85d0d8b1089db5dd57ab127", size = 308002, upload-time = "2025-10-20T16:54:24.746Z" }, + { url = "https://files.pythonhosted.org/packages/ad/c9/5bf9d58cb275559211ba4af905c5a4d95f78c4b973f4186f8b22d8c0b073/pytrec_eval_terrier-0.5.10-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2ca4e624e5f2589ae75c1034ff1f38e9fc81de86314193508ac423e7ca56769c", size = 1330474, upload-time = "2025-10-20T16:54:25.569Z" }, +] + [[package]] name = "pytz" version = "2025.2" @@ -3461,6 +3583,110 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/e3/3425c9a8773807ac2c01d6a56c8521733f09b627e5827e733c5cd36b9ac5/sanic_routing-23.12.0-py3-none-any.whl", hash = "sha256:1558a72afcb9046ed3134a5edae02fc1552cff08f0fff2e8d5de0877ea43ed73", size = 25522, upload-time = "2023-12-31T09:28:35.233Z" }, ] +[[package]] +name = "scikit-learn" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11' and sys_platform != 'darwin'", + "python_full_version < '3.11' and sys_platform == 'darwin'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" }, + { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" }, + { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" }, + { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" }, + { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" }, + { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" }, + { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" }, + { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" }, + { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" }, + { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" }, + { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" }, + { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" }, + { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" }, + { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" }, + { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" }, + { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" }, + { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" }, + { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" }, + { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" }, + { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" }, + { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" }, + { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" }, + { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" }, + { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" }, + { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" }, + { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" }, + { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" }, +] + +[[package]] +name = "scikit-learn" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform != 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'darwin'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", +] +dependencies = [ + { name = "joblib", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "threadpoolctl", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/92/53ea2181da8ac6bf27170191028aee7251f8f841f8d3edbfdcaf2008fde9/scikit_learn-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da", size = 8595835, upload-time = "2025-12-10T07:07:39.385Z" }, + { url = "https://files.pythonhosted.org/packages/01/18/d154dc1638803adf987910cdd07097d9c526663a55666a97c124d09fb96a/scikit_learn-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1", size = 8080381, upload-time = "2025-12-10T07:07:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/8a/44/226142fcb7b7101e64fdee5f49dbe6288d4c7af8abf593237b70fca080a4/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e30adb87f0cc81c7690a84f7932dd66be5bac57cfe16b91cb9151683a4a2d3b", size = 8799632, upload-time = "2025-12-10T07:07:43.899Z" }, + { url = "https://files.pythonhosted.org/packages/36/4d/4a67f30778a45d542bbea5db2dbfa1e9e100bf9ba64aefe34215ba9f11f6/scikit_learn-1.8.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ada8121bcb4dac28d930febc791a69f7cb1673c8495e5eee274190b73a4559c1", size = 9103788, upload-time = "2025-12-10T07:07:45.982Z" }, + { url = "https://files.pythonhosted.org/packages/89/3c/45c352094cfa60050bcbb967b1faf246b22e93cb459f2f907b600f2ceda5/scikit_learn-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:c57b1b610bd1f40ba43970e11ce62821c2e6569e4d74023db19c6b26f246cb3b", size = 8081706, upload-time = "2025-12-10T07:07:48.111Z" }, + { url = "https://files.pythonhosted.org/packages/3d/46/5416595bb395757f754feb20c3d776553a386b661658fb21b7c814e89efe/scikit_learn-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:2838551e011a64e3053ad7618dda9310175f7515f1742fa2d756f7c874c05961", size = 7688451, upload-time = "2025-12-10T07:07:49.873Z" }, + { url = "https://files.pythonhosted.org/packages/90/74/e6a7cc4b820e95cc38cf36cd74d5aa2b42e8ffc2d21fe5a9a9c45c1c7630/scikit_learn-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e", size = 8548242, upload-time = "2025-12-10T07:07:51.568Z" }, + { url = "https://files.pythonhosted.org/packages/49/d8/9be608c6024d021041c7f0b3928d4749a706f4e2c3832bbede4fb4f58c95/scikit_learn-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76", size = 8079075, upload-time = "2025-12-10T07:07:53.697Z" }, + { url = "https://files.pythonhosted.org/packages/dd/47/f187b4636ff80cc63f21cd40b7b2d177134acaa10f6bb73746130ee8c2e5/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4", size = 8660492, upload-time = "2025-12-10T07:07:55.574Z" }, + { url = "https://files.pythonhosted.org/packages/97/74/b7a304feb2b49df9fafa9382d4d09061a96ee9a9449a7cbea7988dda0828/scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a", size = 8931904, upload-time = "2025-12-10T07:07:57.666Z" }, + { url = "https://files.pythonhosted.org/packages/9f/c4/0ab22726a04ede56f689476b760f98f8f46607caecff993017ac1b64aa5d/scikit_learn-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809", size = 8019359, upload-time = "2025-12-10T07:07:59.838Z" }, + { url = "https://files.pythonhosted.org/packages/24/90/344a67811cfd561d7335c1b96ca21455e7e472d281c3c279c4d3f2300236/scikit_learn-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb", size = 7641898, upload-time = "2025-12-10T07:08:01.36Z" }, + { url = "https://files.pythonhosted.org/packages/03/aa/e22e0768512ce9255eba34775be2e85c2048da73da1193e841707f8f039c/scikit_learn-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a", size = 8513770, upload-time = "2025-12-10T07:08:03.251Z" }, + { url = "https://files.pythonhosted.org/packages/58/37/31b83b2594105f61a381fc74ca19e8780ee923be2d496fcd8d2e1147bd99/scikit_learn-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e", size = 8044458, upload-time = "2025-12-10T07:08:05.336Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5a/3f1caed8765f33eabb723596666da4ebbf43d11e96550fb18bdec42b467b/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57", size = 8610341, upload-time = "2025-12-10T07:08:07.732Z" }, + { url = "https://files.pythonhosted.org/packages/38/cf/06896db3f71c75902a8e9943b444a56e727418f6b4b4a90c98c934f51ed4/scikit_learn-1.8.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e", size = 8900022, upload-time = "2025-12-10T07:08:09.862Z" }, + { url = "https://files.pythonhosted.org/packages/1c/f9/9b7563caf3ec8873e17a31401858efab6b39a882daf6c1bfa88879c0aa11/scikit_learn-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271", size = 7989409, upload-time = "2025-12-10T07:08:12.028Z" }, + { url = "https://files.pythonhosted.org/packages/49/bd/1f4001503650e72c4f6009ac0c4413cb17d2d601cef6f71c0453da2732fc/scikit_learn-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3", size = 7619760, upload-time = "2025-12-10T07:08:13.688Z" }, + { url = "https://files.pythonhosted.org/packages/d2/7d/a630359fc9dcc95496588c8d8e3245cc8fd81980251079bc09c70d41d951/scikit_learn-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735", size = 8826045, upload-time = "2025-12-10T07:08:15.215Z" }, + { url = "https://files.pythonhosted.org/packages/cc/56/a0c86f6930cfcd1c7054a2bc417e26960bb88d32444fe7f71d5c2cfae891/scikit_learn-1.8.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd", size = 8420324, upload-time = "2025-12-10T07:08:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/46/1e/05962ea1cebc1cf3876667ecb14c283ef755bf409993c5946ade3b77e303/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e", size = 8680651, upload-time = "2025-12-10T07:08:19.952Z" }, + { url = "https://files.pythonhosted.org/packages/fe/56/a85473cd75f200c9759e3a5f0bcab2d116c92a8a02ee08ccd73b870f8bb4/scikit_learn-1.8.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb", size = 8925045, upload-time = "2025-12-10T07:08:22.11Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b7/64d8cfa896c64435ae57f4917a548d7ac7a44762ff9802f75a79b77cb633/scikit_learn-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702", size = 8507994, upload-time = "2025-12-10T07:08:23.943Z" }, + { url = "https://files.pythonhosted.org/packages/5e/37/e192ea709551799379958b4c4771ec507347027bb7c942662c7fbeba31cb/scikit_learn-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde", size = 7869518, upload-time = "2025-12-10T07:08:25.71Z" }, + { url = "https://files.pythonhosted.org/packages/24/05/1af2c186174cc92dcab2233f327336058c077d38f6fe2aceb08e6ab4d509/scikit_learn-1.8.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3", size = 8528667, upload-time = "2025-12-10T07:08:27.541Z" }, + { url = "https://files.pythonhosted.org/packages/a8/25/01c0af38fe969473fb292bba9dc2b8f9b451f3112ff242c647fee3d0dfe7/scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7", size = 8066524, upload-time = "2025-12-10T07:08:29.822Z" }, + { url = "https://files.pythonhosted.org/packages/be/ce/a0623350aa0b68647333940ee46fe45086c6060ec604874e38e9ab7d8e6c/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6", size = 8657133, upload-time = "2025-12-10T07:08:31.865Z" }, + { url = "https://files.pythonhosted.org/packages/b8/cb/861b41341d6f1245e6ca80b1c1a8c4dfce43255b03df034429089ca2a2c5/scikit_learn-1.8.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4", size = 8923223, upload-time = "2025-12-10T07:08:34.166Z" }, + { url = "https://files.pythonhosted.org/packages/76/18/a8def8f91b18cd1ba6e05dbe02540168cb24d47e8dcf69e8d00b7da42a08/scikit_learn-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6", size = 8096518, upload-time = "2025-12-10T07:08:36.339Z" }, + { url = "https://files.pythonhosted.org/packages/d1/77/482076a678458307f0deb44e29891d6022617b2a64c840c725495bee343f/scikit_learn-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242", size = 7754546, upload-time = "2025-12-10T07:08:38.128Z" }, + { url = "https://files.pythonhosted.org/packages/2d/d1/ef294ca754826daa043b2a104e59960abfab4cf653891037d19dd5b6f3cf/scikit_learn-1.8.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7", size = 8848305, upload-time = "2025-12-10T07:08:41.013Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e2/b1f8b05138ee813b8e1a4149f2f0d289547e60851fd1bb268886915adbda/scikit_learn-1.8.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9", size = 8432257, upload-time = "2025-12-10T07:08:42.873Z" }, + { url = "https://files.pythonhosted.org/packages/26/11/c32b2138a85dcb0c99f6afd13a70a951bfdff8a6ab42d8160522542fb647/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f", size = 8678673, upload-time = "2025-12-10T07:08:45.362Z" }, + { url = "https://files.pythonhosted.org/packages/c7/57/51f2384575bdec454f4fe4e7a919d696c9ebce914590abf3e52d47607ab8/scikit_learn-1.8.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9", size = 8922467, upload-time = "2025-12-10T07:08:47.408Z" }, + { url = "https://files.pythonhosted.org/packages/35/4d/748c9e2872637a57981a04adc038dacaa16ba8ca887b23e34953f0b3f742/scikit_learn-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2", size = 8774395, upload-time = "2025-12-10T07:08:49.337Z" }, + { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload-time = "2025-12-10T07:08:51.601Z" }, +] + [[package]] name = "scipy" version = "1.15.3" @@ -3598,6 +3824,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/47/a494741db7280eae6dc033510c319e34d42dd41b7ac0c7ead39354d1a2b5/scipy-1.16.3-cp314-cp314t-win_arm64.whl", hash = "sha256:21d9d6b197227a12dcbf9633320a4e34c6b0e51c57268df255a0942983bac562", size = 26464127, upload-time = "2025-10-28T17:38:11.34Z" }, ] +[[package]] +name = "sentence-transformers" +version = "5.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, + { name = "tqdm" }, + { name = "transformers" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/bc/0bc9c0ec1cf83ab2ec6e6f38667d167349b950fff6dd2086b79bd360eeca/sentence_transformers-5.2.2.tar.gz", hash = "sha256:7033ee0a24bc04c664fd490abf2ef194d387b3a58a97adcc528783ff505159fa", size = 381607, upload-time = "2026-01-27T11:11:02.658Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/21/7e925890636791386e81b52878134f114d63072e79fffe14cdcc5e7a5e6a/sentence_transformers-5.2.2-py3-none-any.whl", hash = "sha256:280ac54bffb84c110726b4d8848ba7b7c60813b9034547f8aea6e9a345cd1c23", size = 494106, upload-time = "2026-01-27T11:11:00.983Z" }, +] + [[package]] name = "setuptools" version = "80.9.0" @@ -3750,6 +3999,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, ] +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + [[package]] name = "tiktoken" version = "0.12.0"