Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/llava_onevision/conf/serve/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
pipeline_parallel_size: 1
gpu_memory_utilization: 0.9
max_model_len: 32768
limit_mm_per_prompt: image=8
limit_mm_per_prompt: '{"image": 8}'
max_num_seqs: 16
enforce_eager: true
trust_remote_code: true
Expand Down
2 changes: 1 addition & 1 deletion examples/minicpm_o_2.6/conf/serve/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
pipeline_parallel_size: 1
gpu_memory_utilization: 0.9
max_num_seqs: 256
limit_mm_per_prompt: image=18
limit_mm_per_prompt: '{"image": 18}'
port: 9010
trust_remote_code: true
enable_chunked_prefill: true
2 changes: 1 addition & 1 deletion examples/robobrain/conf/serve/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
pipeline_parallel_size: 1
gpu_memory_utilization: 0.9
max_model_len: 32768
limit_mm_per_prompt: image=8
limit_mm_per_prompt: '{"image": 8}'
max_num_seqs: 16
trust_remote_code: true
enable_chunked_prefill: false
Expand Down
2 changes: 1 addition & 1 deletion examples/robobrain2/conf/serve/32b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
pipeline_parallel_size: 1
max_num_seqs: 8 # Even at full 32,768 context usage, 8 concurrent operations won't trigger OOM
gpu_memory_utilization: 0.9
limit_mm_per_prompt: image=18 # should be customized, 18 images/request is enough for most scenarios
limit_mm_per_prompt: '{"image": 18}' # should be customized, 18 images/request is enough for most scenarios
port: 9010
trust_remote_code: true
enforce_eager: false # set true if use FlagGems
Expand Down
2 changes: 1 addition & 1 deletion examples/robobrain2/conf/serve/3b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
pipeline_parallel_size: 1
max_num_seqs: 8 # Even at full 32,768 context usage, 8 concurrent operations won't trigger OOM
gpu_memory_utilization: 0.9
limit_mm_per_prompt: image=18 # should be customized, 18 images/request is enough for most scenarios
limit_mm_per_prompt: '{"image": 18}' # should be customized, 18 images/request is enough for most scenarios
port: 9010
trust_remote_code: true
enforce_eager: false # set true if use FlagGems
Expand Down
2 changes: 1 addition & 1 deletion examples/robobrain2/conf/serve/7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
pipeline_parallel_size: 1
max_num_seqs: 8 # Even at full 32,768 context usage, 8 concurrent operations won't trigger OOM
gpu_memory_utilization: 0.9
limit_mm_per_prompt: image=18 # should be customized, 18 images/request is enough for most scenarios
limit_mm_per_prompt: '{"image": 18}' # should be customized, 18 images/request is enough for most scenarios
port: 9010
trust_remote_code: true
enforce_eager: false # set true if use FlagGems
Expand Down