Skip to content

Commit 8471b27

Browse files
authored
[compile] raise on compile_size implicit padding (vllm-project#32343)
Signed-off-by: dolpm <34420038+dolpm@users.noreply.github.com>
1 parent 66652e8 commit 8471b27

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed

tests/compile/test_config.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,3 +470,68 @@ def test_cached_compilation_config(default_vllm_config):
470470

471471
code = " ".join(code)
472472
assert "torch.ops._C.static_scaled_fp8_quant.default(" in code
473+
474+
475+
def test_compile_sizes_padding_validation():
476+
"""Test that compile_sizes with values that would be padded raises an error."""
477+
# cudagraph_capture_sizes=[1, 2, 4, 8] means:
478+
# - size 1 -> padded to 1
479+
# - size 2 -> padded to 2
480+
# - size 3 -> padded to 4
481+
# - size 4 -> padded to 4
482+
# - size 5 -> padded to 8
483+
# etc.
484+
# So compile_sizes=[3] should fail because 3 would be padded to 4
485+
486+
with pytest.raises(ValueError, match="would be padded to"):
487+
config = CompilationConfig(
488+
cudagraph_capture_sizes=[1, 2, 4, 8],
489+
max_cudagraph_capture_size=8,
490+
compile_sizes=[3],
491+
)
492+
config.post_init_cudagraph_sizes()
493+
494+
with pytest.raises(ValueError, match="would be padded to"):
495+
config = CompilationConfig(
496+
cudagraph_capture_sizes=[1, 2, 4, 8],
497+
max_cudagraph_capture_size=8,
498+
compile_sizes=[5],
499+
)
500+
config.post_init_cudagraph_sizes()
501+
502+
config = CompilationConfig(
503+
cudagraph_capture_sizes=[1, 2, 4, 8],
504+
max_cudagraph_capture_size=8,
505+
compile_sizes=[1, 2, 4, 8],
506+
)
507+
config.post_init_cudagraph_sizes()
508+
assert sorted(config.compile_sizes) == [1, 2, 4, 8]
509+
510+
config = CompilationConfig(
511+
cudagraph_capture_sizes=[1, 2, 4, 8],
512+
max_cudagraph_capture_size=8,
513+
compile_sizes=["cudagraph_capture_sizes"],
514+
)
515+
config.post_init_cudagraph_sizes()
516+
assert sorted(config.compile_sizes) == [1, 2, 4, 8]
517+
518+
# When cudagraphs are disabled (max_cudagraph_capture_size=0),
519+
# padding validation should be skipped
520+
config = CompilationConfig(
521+
cudagraph_capture_sizes=[],
522+
max_cudagraph_capture_size=0,
523+
compile_sizes=[3, 5, 7], # would be invalid with cudagraphs
524+
)
525+
config.post_init_cudagraph_sizes()
526+
assert sorted(config.compile_sizes) == [3, 5, 7]
527+
528+
# When cudagraph_mode is NONE but capture_sizes is non-empty,
529+
# padding validation should still be skipped
530+
config = CompilationConfig(
531+
cudagraph_capture_sizes=[1, 2, 4, 8],
532+
max_cudagraph_capture_size=8,
533+
cudagraph_mode=CUDAGraphMode.NONE,
534+
compile_sizes=[3, 5, 7], # would be invalid if cudagraphs were enabled
535+
)
536+
config.post_init_cudagraph_sizes()
537+
assert sorted(config.compile_sizes) == [3, 5, 7]

vllm/config/compilation.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,20 @@ def post_init_cudagraph_sizes(self) -> None:
909909
# May get recomputed in the model runner if adjustment is needed for spec-decode
910910
self.compute_bs_to_padded_graph_size()
911911

912+
# Validate that compile_sizes won't be changed by padding.
913+
# Only validate when cudagraphs are actually being used.
914+
if self.compile_sizes and self.cudagraph_mode != CUDAGraphMode.NONE:
915+
for size in self.compile_sizes:
916+
if size <= self.max_cudagraph_capture_size:
917+
padded = self.bs_to_padded_graph_size[size]
918+
if padded != size:
919+
raise ValueError(
920+
f"compile_sizes contains {size} which would be "
921+
f"padded to {padded}. All compile_sizes must be "
922+
"values that won't be changed by cudagraph padding. "
923+
"Use values from cudagraph_capture_sizes."
924+
)
925+
912926
def set_splitting_ops_for_v1(
913927
self, all2all_backend: str, data_parallel_size: int = 1
914928
):

0 commit comments

Comments
 (0)