# HELP python_gc_objects_collected_total Objects collected during gc
# TYPE python_gc_objects_collected_total counter
python_gc_objects_collected_total{generation="0"} 11967.0
python_gc_objects_collected_total{generation="1"} 1552.0
python_gc_objects_collected_total{generation="2"} 855.0
# HELP python_gc_objects_uncollectable_total Uncollectable objects found during GC
# TYPE python_gc_objects_uncollectable_total counter
python_gc_objects_uncollectable_total{generation="0"} 0.0
python_gc_objects_uncollectable_total{generation="1"} 0.0
python_gc_objects_uncollectable_total{generation="2"} 0.0
# HELP python_gc_collections_total Number of times this generation was collected
# TYPE python_gc_collections_total counter
python_gc_collections_total{generation="0"} 1341.0
python_gc_collections_total{generation="1"} 121.0
python_gc_collections_total{generation="2"} 9.0
# HELP python_info Python platform information
# TYPE python_info gauge
python_info{implementation="CPython",major="3",minor="12",patchlevel="3",version="3.12.3"} 1.0
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 3.8945837056e+010
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 1.350602752e+09
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.77980807988e+09
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 41.7
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 64.0
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 65535.0
# HELP vllm:estimated_flops_per_gpu_total Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_total counter
vllm:estimated_flops_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_flops_per_gpu_created Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_created gauge
vllm:estimated_flops_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808185822307e+09
# HELP vllm:estimated_read_bytes_per_gpu_total Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_total counter
vllm:estimated_read_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_read_bytes_per_gpu_created Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_created gauge
vllm:estimated_read_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858223376e+09
# HELP vllm:estimated_write_bytes_per_gpu_total Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_total counter
vllm:estimated_write_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_write_bytes_per_gpu_created Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_created gauge
vllm:estimated_write_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858223584e+09
# HELP vllm:num_requests_running Number of requests in model execution batches.
# TYPE vllm:num_requests_running gauge
vllm:num_requests_running{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_requests_waiting Number of requests waiting to be processed.
# TYPE vllm:num_requests_waiting gauge
vllm:num_requests_waiting{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:engine_sleep_state Engine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.
# TYPE vllm:engine_sleep_state gauge
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="awake"} 1.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="weights_offloaded"} 0.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="discard_all"} 0.0
# HELP vllm:kv_cache_usage_perc KV-cache usage. 1 means 100 percent usage.
# TYPE vllm:kv_cache_usage_perc gauge
vllm:kv_cache_usage_perc{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_queries_total Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_total counter
vllm:prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.483835e+06
# HELP vllm:prefix_cache_queries_created Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_created gauge
vllm:prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808185822533e+09
# HELP vllm:prefix_cache_hits_total Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_total counter
vllm:prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_hits_created Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_created gauge
vllm:prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858225553e+09
# HELP vllm:external_prefix_cache_queries_total External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_total counter
vllm:external_prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:external_prefix_cache_queries_created External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_created gauge
vllm:external_prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808185822571e+09
# HELP vllm:external_prefix_cache_hits_total External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_total counter
vllm:external_prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:external_prefix_cache_hits_created External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_created gauge
vllm:external_prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858225827e+09
# HELP vllm:mm_cache_queries_total Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_total counter
vllm:mm_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_queries_created Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_created gauge
vllm:mm_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858225946e+09
# HELP vllm:mm_cache_hits_total Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_total counter
vllm:mm_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_hits_created Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_created gauge
vllm:mm_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858226066e+09
# HELP vllm:num_preemptions_total Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_total counter
vllm:num_preemptions_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_preemptions_created Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_created gauge
vllm:num_preemptions_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858226185e+09
# HELP vllm:prompt_tokens_total Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_total counter
vllm:prompt_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.483835e+06
# HELP vllm:prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_created gauge
vllm:prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858226302e+09
# HELP vllm:prompt_tokens_by_source_total Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_total counter
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.483835e+06
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 0.0
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 0.0
# HELP vllm:prompt_tokens_by_source_created Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_created gauge
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.779808185822646e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 1.7798081858226511e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 1.7798081858226562e+09
# HELP vllm:prompt_tokens_cached_total Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_total counter
vllm:prompt_tokens_cached_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_cached_created Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_created gauge
vllm:prompt_tokens_cached_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858226674e+09
# HELP vllm:prompt_tokens_recomputed_total Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_total counter
vllm:prompt_tokens_recomputed_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_recomputed_created Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_created gauge
vllm:prompt_tokens_recomputed_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858226821e+09
# HELP vllm:generation_tokens_total Number of generation tokens processed.
# TYPE vllm:generation_tokens_total counter
vllm:generation_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 95232.0
# HELP vllm:generation_tokens_created Number of generation tokens processed.
# TYPE vllm:generation_tokens_created gauge
vllm:generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858226945e+09
# HELP vllm:request_success_total Count of successfully processed requests.
# TYPE vllm:request_success_total counter
vllm:request_success_total{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_success_total{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:request_success_created Count of successfully processed requests.
# TYPE vllm:request_success_created gauge
vllm:request_success_created{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858227305e+09
vllm:request_success_created{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858227377e+09
vllm:request_success_created{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858227463e+09
vllm:request_success_created{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858227522e+09
vllm:request_success_created{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808185822759e+09
# HELP vllm:request_prompt_tokens Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens histogram
vllm:request_prompt_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prompt_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.483835e+06
# HELP vllm:request_prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens_created gauge
vllm:request_prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858228157e+09
# HELP vllm:request_generation_tokens Number of generation tokens processed.
# TYPE vllm:request_generation_tokens histogram
vllm:request_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 95232.0
# HELP vllm:request_generation_tokens_created Number of generation tokens processed.
# TYPE vllm:request_generation_tokens_created gauge
vllm:request_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858228867e+09
# HELP vllm:iteration_tokens_total Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total histogram
vllm:iteration_tokens_total_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1612.0
vllm:iteration_tokens_total_bucket{engine="0",le="8.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 12089.0
vllm:iteration_tokens_total_bucket{engine="0",le="16.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15377.0
vllm:iteration_tokens_total_bucket{engine="0",le="32.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15824.0
vllm:iteration_tokens_total_bucket{engine="0",le="64.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15824.0
vllm:iteration_tokens_total_bucket{engine="0",le="128.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15824.0
vllm:iteration_tokens_total_bucket{engine="0",le="256.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15824.0
vllm:iteration_tokens_total_bucket{engine="0",le="512.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15824.0
vllm:iteration_tokens_total_bucket{engine="0",le="1024.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15824.0
vllm:iteration_tokens_total_bucket{engine="0",le="2048.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15824.0
vllm:iteration_tokens_total_bucket{engine="0",le="4096.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16156.0
vllm:iteration_tokens_total_bucket{engine="0",le="8192.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16176.0
vllm:iteration_tokens_total_bucket{engine="0",le="16384.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16176.0
vllm:iteration_tokens_total_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16176.0
vllm:iteration_tokens_total_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16176.0
vllm:iteration_tokens_total_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.579067e+06
# HELP vllm:iteration_tokens_total_created Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total_created gauge
vllm:iteration_tokens_total_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858229313e+09
# HELP vllm:request_max_num_generation_tokens Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens histogram
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_max_num_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 95232.0
# HELP vllm:request_max_num_generation_tokens_created Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens_created gauge
vllm:request_max_num_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858229792e+09
# HELP vllm:request_params_n Histogram of the n request parameter.
# TYPE vllm:request_params_n histogram
vllm:request_params_n_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_n_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_n_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_n_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_n_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_n_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_n_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_n_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
# HELP vllm:request_params_n_created Histogram of the n request parameter.
# TYPE vllm:request_params_n_created gauge
vllm:request_params_n_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858230217e+09
# HELP vllm:request_params_max_tokens Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens histogram
vllm:request_params_max_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_params_max_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 95232.0
# HELP vllm:request_params_max_tokens_created Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens_created gauge
vllm:request_params_max_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858230462e+09
# HELP vllm:time_to_first_token_seconds Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds histogram
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.001",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.005",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.02",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.04",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.06",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.08",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.25",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 5.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 318.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 368.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="160.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="640.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2560.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:time_to_first_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 132.1748490333557
# HELP vllm:time_to_first_token_seconds_created Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds_created gauge
vllm:time_to_first_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858230832e+09
# HELP vllm:inter_token_latency_seconds Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds histogram
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 30101.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90102.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92270.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92321.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92321.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92321.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92482.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94700.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94700.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94847.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 94860.0
vllm:inter_token_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1720.9519141139463
# HELP vllm:inter_token_latency_seconds_created Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds_created gauge
vllm:inter_token_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858231328e+09
# HELP vllm:request_time_per_output_token_seconds Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds histogram
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 40.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 319.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_time_per_output_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 6.74883103574097
# HELP vllm:request_time_per_output_token_seconds_created Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds_created gauge
vllm:request_time_per_output_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808185823172e+09
# HELP vllm:e2e_request_latency_seconds Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds histogram
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 8.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 26.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 195.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:e2e_request_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1853.0146126747131
# HELP vllm:e2e_request_latency_seconds_created Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds_created gauge
vllm:e2e_request_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808185824654e+09
# HELP vllm:request_queue_time_seconds Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds histogram
vllm:request_queue_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_queue_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0028111001010984182
# HELP vllm:request_queue_time_seconds_created Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds_created gauge
vllm:request_queue_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858247194e+09
# HELP vllm:request_inference_time_seconds Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds histogram
vllm:request_inference_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 8.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 27.0
vllm:request_inference_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 201.0
vllm:request_inference_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_inference_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1831.554692038335
# HELP vllm:request_inference_time_seconds_created Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds_created gauge
vllm:request_inference_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808185824763e+09
# HELP vllm:request_prefill_time_seconds Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds histogram
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 304.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 355.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 369.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 371.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 110.60277792438865
# HELP vllm:request_prefill_time_seconds_created Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds_created gauge
vllm:request_prefill_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858248146e+09
# HELP vllm:request_decode_time_seconds Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds histogram
vllm:request_decode_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 3.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 14.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 35.0
vllm:request_decode_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 224.0
vllm:request_decode_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_decode_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1720.9519141139463
# HELP vllm:request_decode_time_seconds_created Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds_created gauge
vllm:request_decode_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858248522e+09
# HELP vllm:request_prefill_kv_computed_tokens Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens histogram
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 372.0
vllm:request_prefill_kv_computed_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.483835e+06
# HELP vllm:request_prefill_kv_computed_tokens_created Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens_created gauge
vllm:request_prefill_kv_computed_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798081858249016e+09
# HELP vllm:cache_config_info Information of the LLMEngine CacheConfig
# TYPE vllm:cache_config_info gauge
vllm:cache_config_info{_block_size_resolved="True",block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_kvcache_space_bytes="None",enable_prefix_caching="True",engine="0",gpu_memory_utilization="0.9",is_attention_free="False",kv_cache_memory_bytes="None",kv_offloading_backend="native",kv_offloading_size="None",kv_sharing_fast_prefill="False",mamba_block_size="None",mamba_cache_dtype="auto",mamba_cache_mode="none",mamba_page_size_padded="None",mamba_ssm_cache_dtype="auto",num_cpu_blocks="None",num_gpu_blocks="17590",num_gpu_blocks_override="None",prefix_caching_hash_algo="sha256",sliding_window="None",user_specified_block_size="False"} 1.0
# HELP http_requests_total Total number of requests by method, status and handler.
# TYPE http_requests_total counter
http_requests_total{handler="/v1/models",method="GET",status="2xx"} 1.0
http_requests_total{handler="/v1/chat/completions",method="POST",status="2xx"} 372.0
# HELP http_requests_created Total number of requests by method, status and handler.
# TYPE http_requests_created gauge
http_requests_created{handler="/v1/models",method="GET",status="2xx"} 1.7798081876221206e+09
http_requests_created{handler="/v1/chat/completions",method="POST",status="2xx"} 1.7798081937688167e+09
# HELP http_request_size_bytes Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes summary
http_request_size_bytes_count{handler="/v1/models"} 1.0
http_request_size_bytes_sum{handler="/v1/models"} 0.0
http_request_size_bytes_count{handler="/v1/chat/completions"} 372.0
http_request_size_bytes_sum{handler="/v1/chat/completions"} 1.961928e+06
# HELP http_request_size_bytes_created Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes_created gauge
http_request_size_bytes_created{handler="/v1/models"} 1.7798081876221485e+09
http_request_size_bytes_created{handler="/v1/chat/completions"} 1.7798081937688417e+09
# HELP http_response_size_bytes Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes summary
http_response_size_bytes_count{handler="/v1/models"} 1.0
http_response_size_bytes_sum{handler="/v1/models"} 558.0
http_response_size_bytes_count{handler="/v1/chat/completions"} 372.0
http_response_size_bytes_sum{handler="/v1/chat/completions"} 0.0
# HELP http_response_size_bytes_created Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes_created gauge
http_response_size_bytes_created{handler="/v1/models"} 1.779808187622177e+09
http_response_size_bytes_created{handler="/v1/chat/completions"} 1.7798081937688694e+09
# HELP http_request_duration_highr_seconds Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds histogram
http_request_duration_highr_seconds_bucket{le="0.01"} 1.0
http_request_duration_highr_seconds_bucket{le="0.025"} 1.0
http_request_duration_highr_seconds_bucket{le="0.05"} 1.0
http_request_duration_highr_seconds_bucket{le="0.075"} 1.0
http_request_duration_highr_seconds_bucket{le="0.1"} 1.0
http_request_duration_highr_seconds_bucket{le="0.25"} 1.0
http_request_duration_highr_seconds_bucket{le="0.5"} 1.0
http_request_duration_highr_seconds_bucket{le="0.75"} 1.0
http_request_duration_highr_seconds_bucket{le="1.0"} 1.0
http_request_duration_highr_seconds_bucket{le="1.5"} 1.0
http_request_duration_highr_seconds_bucket{le="2.0"} 9.0
http_request_duration_highr_seconds_bucket{le="2.5"} 27.0
http_request_duration_highr_seconds_bucket{le="3.0"} 48.0
http_request_duration_highr_seconds_bucket{le="3.5"} 78.0
http_request_duration_highr_seconds_bucket{le="4.0"} 114.0
http_request_duration_highr_seconds_bucket{le="4.5"} 151.0
http_request_duration_highr_seconds_bucket{le="5.0"} 196.0
http_request_duration_highr_seconds_bucket{le="7.5"} 342.0
http_request_duration_highr_seconds_bucket{le="10.0"} 373.0
http_request_duration_highr_seconds_bucket{le="30.0"} 373.0
http_request_duration_highr_seconds_bucket{le="60.0"} 373.0
http_request_duration_highr_seconds_bucket{le="+Inf"} 373.0
http_request_duration_highr_seconds_count 373.0
http_request_duration_highr_seconds_sum 1853.5824478221475
# HELP http_request_duration_highr_seconds_created Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds_created gauge
http_request_duration_highr_seconds_created 1.7798081863473852e+09
# HELP http_request_duration_seconds Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{handler="/v1/models",le="0.1",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="0.5",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="1.0",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="+Inf",method="GET"} 1.0
http_request_duration_seconds_count{handler="/v1/models",method="GET"} 1.0
http_request_duration_seconds_sum{handler="/v1/models",method="GET"} 0.00240229198243469
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.1",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.5",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="1.0",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="+Inf",method="POST"} 372.0
http_request_duration_seconds_count{handler="/v1/chat/completions",method="POST"} 372.0
http_request_duration_seconds_sum{handler="/v1/chat/completions",method="POST"} 1853.580045530165
# HELP http_request_duration_seconds_created Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds_created gauge
http_request_duration_seconds_created{handler="/v1/models",method="GET"} 1.779808187622212e+09
http_request_duration_seconds_created{handler="/v1/chat/completions",method="POST"} 1.7798081937689064e+09
