Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
073fe61
feat: Replace legacy spillover logic with Waterfall LRU architecture
hazemawadalla Dec 9, 2025
2eb39cf
Fix two runtime errors in RAG-enabled benchmark mode
hazemawadalla Dec 19, 2025
f78bf60
Add detailed README.md for running the different invocations of kv-ca…
hazemawadalla Dec 19, 2025
2464edf
fix: line endings from dos2unix; increase cpu memory to 4GB for mlper…
hazemawadalla Dec 19, 2025
70b8f69
Update MLperf v3 KV cache proposal.md to recommend using a minimum of…
hazemawadalla Dec 19, 2025
9e60b98
Add storage throughput metric, ShareGPT integration, LMCache validati…
hazemawadalla Jan 10, 2026
db82626
Update MLPerf v3 submission guidelines with discovery test validation
hazemawadalla Jan 13, 2026
f1ff963
Improve test suite with HTML reporting and flexible tier assertions
hazemawadalla Jan 13, 2026
e016954
Add pytest-html dependency for HTML test reports
hazemawadalla Jan 13, 2026
c1e5ff7
Add unit test HTML report showing all 112 tests passing
hazemawadalla Jan 13, 2026
e995340
Update NVMe Bandwidth specification to 14,000 MB/s
hazemawadalla Jan 13, 2026
bad674c
Fix KV cache size per token values in discovery doc
hazemawadalla Jan 13, 2026
2159bef
Merge pull request #224 from hazemawadalla/TF_KVCache
FileSystemGuy Jan 13, 2026
d1fc97a
feat(kv-cache): MLPerf v3.0 compliance and configuration overhaul
hazemawadalla Jan 27, 2026
d9715bc
feat(wrapper): config integration and workload automation
hazemawadalla Jan 27, 2026
001fd3b
test(kv-cache): comprehensive pytest suite for v3.0 features
hazemawadalla Jan 27, 2026
2956288
docs(readme): comprehensive documentation for v3.0
hazemawadalla Jan 27, 2026
166f2b2
test(results): add pytest HTML test report
hazemawadalla Jan 27, 2026
99b42f0
feat(xlsx): extended metrics export for v3.0
hazemawadalla Jan 27, 2026
1bfe885
deps(requirements): add pyyaml for config support
hazemawadalla Jan 27, 2026
8a6aa50
config: add default YAML configuration file
hazemawadalla Jan 27, 2026
3db89bd
Refactor monolithic kv-cache.py into modular kv_cache/ package
hazemawadalla Feb 10, 2026
e38cfe9
Fix DeepSeek-V3 MLA values in README, move validate.sh to utils/
hazemawadalla Feb 10, 2026
f4c10a2
docs: fix decode_batch_size shown as hardcoded in proposal
hazemawadalla Feb 10, 2026
f7ecca1
docs: clarify eviction mechanisms in proposal
hazemawadalla Feb 10, 2026
0bf572b
Merge hazem/modular-refactor into TF_KVCache with conflict resolution
FileSystemGuy Feb 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,883 changes: 2,679 additions & 1,204 deletions kv_cache_benchmark/MLperf v3 KV cache proposal.md

Large diffs are not rendered by default.

Binary file not shown.
2,333 changes: 1,718 additions & 615 deletions kv_cache_benchmark/README.md

Large diffs are not rendered by default.

357 changes: 357 additions & 0 deletions kv_cache_benchmark/config.yaml

Large diffs are not rendered by default.

3,235 changes: 0 additions & 3,235 deletions kv_cache_benchmark/kv-cache.py

This file was deleted.

3,151 changes: 0 additions & 3,151 deletions kv_cache_benchmark/kv-cache_sharegpt_replay.py

This file was deleted.

145 changes: 145 additions & 0 deletions kv_cache_benchmark/kv_cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""
KV Cache Benchmark v3.0 — modular package.

Re-exports all public symbols so existing code can do:
from kv_cache import MultiTierCache, IntegratedBenchmark, ...
"""

# Compatibility flags
from kv_cache._compat import (
HAS_CUPY, HAS_YAML, HAS_TORCH, HAS_TIKTOKEN,
CUPY_AVAILABLE, YAML_AVAILABLE, TORCH_AVAILABLE, TIKTOKEN_AVAILABLE,
HAS_PANDAS, PANDAS_AVAILABLE,
HAS_OPENPYXL, OPENPYXL_AVAILABLE,
cp,
)

# Configuration
from kv_cache.config import (
ConfigLoader,
cfg,
get_config,
set_config,
)

# Core data models
from kv_cache.models import (
ModelConfig,
MODEL_CONFIGS,
InferencePhase,
GenerationMode,
GENERATION_TIMING,
QoSLevel,
QoSSLA,
QOS_PROFILES,
get_qos_profiles,
UserProfile,
InferenceRequest,
)

# Conversation management
from kv_cache.conversation import (
ConversationState,
ConversationManager,
)

# Prefix caching
from kv_cache.prefix_cache import (
PrefixType,
PrefixCacheEntry,
PrefixMatcher,
PrefixCacheManager,
)

# RAG workload
from kv_cache.rag import (
RAGChunk,
RAGDocument,
RAGQuery,
RAGDocumentManager,
)

# Storage backends
from kv_cache.backends import (
StorageBackend,
CPUMemoryBackend,
NVMeBackend,
)

# GPU backend is optional (requires CUDA)
try:
from kv_cache.backends import GPUMemoryBackend
except Exception:
pass

# Core cache engine
from kv_cache.cache import (
KVCacheGenerator,
MultiTierCache,
)

# Monitoring and autoscaling
from kv_cache.monitoring import (
StorageMetrics,
StorageMonitor,
WorkloadAutoscaler,
QoSMonitor,
)

# Workload generation and validation
from kv_cache.workload import (
RealTraceEntry,
ValidationEngine,
UserSimulator,
ShareGPTDatasetLoader,
validate_args,
MAX_USERS,
MAX_DURATION_SECONDS,
MAX_GPU_MEMORY_GB,
MAX_CPU_MEMORY_GB,
FORBIDDEN_CACHE_PREFIXES,
)

# Benchmark orchestrator
from kv_cache.benchmark import IntegratedBenchmark

# CLI
from kv_cache.cli import (
export_results_to_xlsx,
main,
)

__all__ = [
# Compat flags
'HAS_CUPY', 'HAS_YAML', 'HAS_TORCH', 'HAS_TIKTOKEN',
'CUPY_AVAILABLE', 'YAML_AVAILABLE', 'TORCH_AVAILABLE', 'TIKTOKEN_AVAILABLE',
'HAS_PANDAS', 'PANDAS_AVAILABLE', 'HAS_OPENPYXL', 'OPENPYXL_AVAILABLE',
'cp',
# Config
'ConfigLoader', 'cfg', 'get_config', 'set_config',
# Models
'ModelConfig', 'MODEL_CONFIGS',
'InferencePhase', 'GenerationMode', 'GENERATION_TIMING',
'QoSLevel', 'QoSSLA', 'QOS_PROFILES', 'get_qos_profiles',
'UserProfile', 'InferenceRequest',
# Conversation
'ConversationState', 'ConversationManager',
# Prefix cache
'PrefixType', 'PrefixCacheEntry', 'PrefixMatcher', 'PrefixCacheManager',
# RAG
'RAGChunk', 'RAGDocument', 'RAGQuery', 'RAGDocumentManager',
# Backends
'StorageBackend', 'GPUMemoryBackend', 'CPUMemoryBackend', 'NVMeBackend',
# Cache engine
'KVCacheGenerator', 'MultiTierCache',
# Monitoring
'StorageMetrics', 'StorageMonitor', 'WorkloadAutoscaler', 'QoSMonitor',
# Workload
'RealTraceEntry', 'ValidationEngine', 'UserSimulator', 'ShareGPTDatasetLoader',
'validate_args', 'MAX_USERS', 'MAX_DURATION_SECONDS',
'MAX_GPU_MEMORY_GB', 'MAX_CPU_MEMORY_GB', 'FORBIDDEN_CACHE_PREFIXES',
# Benchmark
'IntegratedBenchmark',
# CLI
'export_results_to_xlsx', 'main',
]
64 changes: 64 additions & 0 deletions kv_cache_benchmark/kv_cache/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
Optional dependency detection for KV Cache Benchmark.

Centralizes try-import guards so other modules can check availability
without scattered try/except blocks.
"""

# Optional YAML support for config file loading
try:
import yaml
HAS_YAML = True
except ImportError:
yaml = None
HAS_YAML = False

# Alias for backward compatibility
YAML_AVAILABLE = HAS_YAML

# Optional GPU libraries
try:
import torch
HAS_TORCH = True
except ImportError:
torch = None
HAS_TORCH = False

TORCH_AVAILABLE = HAS_TORCH

try:
import cupy as cp
HAS_CUPY = True
except ImportError:
cp = None
HAS_CUPY = False

CUPY_AVAILABLE = HAS_CUPY

try:
import tiktoken
HAS_TIKTOKEN = True
except ImportError:
tiktoken = None
HAS_TIKTOKEN = False

TIKTOKEN_AVAILABLE = HAS_TIKTOKEN

# Optional pandas/openpyxl for XLSX output
try:
import pandas as pd
HAS_PANDAS = True
except ImportError:
pd = None
HAS_PANDAS = False

PANDAS_AVAILABLE = HAS_PANDAS

try:
import openpyxl
HAS_OPENPYXL = True
except ImportError:
openpyxl = None
HAS_OPENPYXL = False

OPENPYXL_AVAILABLE = HAS_OPENPYXL
Loading