From 48ccb9ce408fb9106ed6bd2f521cd96c83db87ed Mon Sep 17 00:00:00 2001 From: Teeeio <1712003847@qq.com> Date: Tue, 20 Jan 2026 07:29:59 +0000 Subject: [PATCH] Fix: Correct limit_mm_per_prompt parameter format for vLLM The vLLM --limit-mm-per-prompt argument uses type=json.loads for parsing, which requires valid JSON string format. Original config used incorrect format causing JSON parse error. Changes: - Fixed format in 6 serve configuration files - Changed from: limit_mm_per_prompt: image=X - Changed to: limit_mm_per_prompt: '{image: X}' Tested with vLLM 0.13.0, service starts successfully. --- examples/llava_onevision/conf/serve/7b.yaml | 2 +- examples/minicpm_o_2.6/conf/serve/7b.yaml | 2 +- examples/robobrain/conf/serve/7b.yaml | 2 +- examples/robobrain2/conf/serve/32b.yaml | 2 +- examples/robobrain2/conf/serve/3b.yaml | 2 +- examples/robobrain2/conf/serve/7b.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/llava_onevision/conf/serve/7b.yaml b/examples/llava_onevision/conf/serve/7b.yaml index 65112005a..fbfc1557a 100644 --- a/examples/llava_onevision/conf/serve/7b.yaml +++ b/examples/llava_onevision/conf/serve/7b.yaml @@ -6,7 +6,7 @@ pipeline_parallel_size: 1 gpu_memory_utilization: 0.9 max_model_len: 32768 - limit_mm_per_prompt: image=8 + limit_mm_per_prompt: '{"image": 8}' max_num_seqs: 16 enforce_eager: true trust_remote_code: true diff --git a/examples/minicpm_o_2.6/conf/serve/7b.yaml b/examples/minicpm_o_2.6/conf/serve/7b.yaml index 6d5fa06ac..7edf0f8e7 100644 --- a/examples/minicpm_o_2.6/conf/serve/7b.yaml +++ b/examples/minicpm_o_2.6/conf/serve/7b.yaml @@ -5,7 +5,7 @@ pipeline_parallel_size: 1 gpu_memory_utilization: 0.9 max_num_seqs: 256 - limit_mm_per_prompt: image=18 + limit_mm_per_prompt: '{"image": 18}' port: 9010 trust_remote_code: true enable_chunked_prefill: true diff --git a/examples/robobrain/conf/serve/7b.yaml b/examples/robobrain/conf/serve/7b.yaml index 8505a31cf..68592882d 100644 --- a/examples/robobrain/conf/serve/7b.yaml +++ b/examples/robobrain/conf/serve/7b.yaml @@ -6,7 +6,7 @@ pipeline_parallel_size: 1 gpu_memory_utilization: 0.9 max_model_len: 32768 - limit_mm_per_prompt: image=8 + limit_mm_per_prompt: '{"image": 8}' max_num_seqs: 16 trust_remote_code: true enable_chunked_prefill: false diff --git a/examples/robobrain2/conf/serve/32b.yaml b/examples/robobrain2/conf/serve/32b.yaml index 22d461d93..5a0c0fe00 100644 --- a/examples/robobrain2/conf/serve/32b.yaml +++ b/examples/robobrain2/conf/serve/32b.yaml @@ -7,7 +7,7 @@ pipeline_parallel_size: 1 max_num_seqs: 8 # Even at full 32,768 context usage, 8 concurrent operations won't trigger OOM gpu_memory_utilization: 0.9 - limit_mm_per_prompt: image=18 # should be customized, 18 images/request is enough for most scenarios + limit_mm_per_prompt: '{"image": 18}' # should be customized, 18 images/request is enough for most scenarios port: 9010 trust_remote_code: true enforce_eager: false # set true if use FlagGems diff --git a/examples/robobrain2/conf/serve/3b.yaml b/examples/robobrain2/conf/serve/3b.yaml index d378a87c8..1d7cdf308 100644 --- a/examples/robobrain2/conf/serve/3b.yaml +++ b/examples/robobrain2/conf/serve/3b.yaml @@ -7,7 +7,7 @@ pipeline_parallel_size: 1 max_num_seqs: 8 # Even at full 32,768 context usage, 8 concurrent operations won't trigger OOM gpu_memory_utilization: 0.9 - limit_mm_per_prompt: image=18 # should be customized, 18 images/request is enough for most scenarios + limit_mm_per_prompt: '{"image": 18}' # should be customized, 18 images/request is enough for most scenarios port: 9010 trust_remote_code: true enforce_eager: false # set true if use FlagGems diff --git a/examples/robobrain2/conf/serve/7b.yaml b/examples/robobrain2/conf/serve/7b.yaml index ff9687256..f62ecea9d 100644 --- a/examples/robobrain2/conf/serve/7b.yaml +++ b/examples/robobrain2/conf/serve/7b.yaml @@ -7,7 +7,7 @@ pipeline_parallel_size: 1 max_num_seqs: 8 # Even at full 32,768 context usage, 8 concurrent operations won't trigger OOM gpu_memory_utilization: 0.9 - limit_mm_per_prompt: image=18 # should be customized, 18 images/request is enough for most scenarios + limit_mm_per_prompt: '{"image": 18}' # should be customized, 18 images/request is enough for most scenarios port: 9010 trust_remote_code: true enforce_eager: false # set true if use FlagGems