# HELP python_gc_objects_collected_total Objects collected during gc
# TYPE python_gc_objects_collected_total counter
python_gc_objects_collected_total{generation="0"} 11970.0
python_gc_objects_collected_total{generation="1"} 1549.0
python_gc_objects_collected_total{generation="2"} 855.0
# HELP python_gc_objects_uncollectable_total Uncollectable objects found during GC
# TYPE python_gc_objects_uncollectable_total counter
python_gc_objects_uncollectable_total{generation="0"} 0.0
python_gc_objects_uncollectable_total{generation="1"} 0.0
python_gc_objects_uncollectable_total{generation="2"} 0.0
# HELP python_gc_collections_total Number of times this generation was collected
# TYPE python_gc_collections_total counter
python_gc_collections_total{generation="0"} 1344.0
python_gc_collections_total{generation="1"} 122.0
python_gc_collections_total{generation="2"} 9.0
# HELP python_info Python platform information
# TYPE python_info gauge
python_info{implementation="CPython",major="3",minor="12",patchlevel="3",version="3.12.3"} 1.0
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 3.8946234368e+010
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 1.349824512e+09
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.77980843334e+09
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 40.91
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 64.0
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 65535.0
# HELP vllm:estimated_flops_per_gpu_total Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_total counter
vllm:estimated_flops_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_flops_per_gpu_created Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_created gauge
vllm:estimated_flops_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537012282e+09
# HELP vllm:estimated_read_bytes_per_gpu_total Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_total counter
vllm:estimated_read_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_read_bytes_per_gpu_created Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_created gauge
vllm:estimated_read_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370123103e+09
# HELP vllm:estimated_write_bytes_per_gpu_total Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_total counter
vllm:estimated_write_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_write_bytes_per_gpu_created Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_created gauge
vllm:estimated_write_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370123277e+09
# HELP vllm:num_requests_running Number of requests in model execution batches.
# TYPE vllm:num_requests_running gauge
vllm:num_requests_running{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_requests_waiting Number of requests waiting to be processed.
# TYPE vllm:num_requests_waiting gauge
vllm:num_requests_waiting{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:engine_sleep_state Engine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.
# TYPE vllm:engine_sleep_state gauge
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="awake"} 1.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="weights_offloaded"} 0.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="discard_all"} 0.0
# HELP vllm:kv_cache_usage_perc KV-cache usage. 1 means 100 percent usage.
# TYPE vllm:kv_cache_usage_perc gauge
vllm:kv_cache_usage_perc{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_queries_total Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_total counter
vllm:prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.444181e+06
# HELP vllm:prefix_cache_queries_created Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_created gauge
vllm:prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370124936e+09
# HELP vllm:prefix_cache_hits_total Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_total counter
vllm:prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_hits_created Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_created gauge
vllm:prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370125084e+09
# HELP vllm:external_prefix_cache_queries_total External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_total counter
vllm:external_prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.444181e+06
# HELP vllm:external_prefix_cache_queries_created External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_created gauge
vllm:external_prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537012521e+09
# HELP vllm:external_prefix_cache_hits_total External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_total counter
vllm:external_prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:external_prefix_cache_hits_created External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_created gauge
vllm:external_prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370125337e+09
# HELP vllm:mm_cache_queries_total Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_total counter
vllm:mm_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_queries_created Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_created gauge
vllm:mm_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370125554e+09
# HELP vllm:mm_cache_hits_total Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_total counter
vllm:mm_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_hits_created Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_created gauge
vllm:mm_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537012577e+09
# HELP vllm:num_preemptions_total Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_total counter
vllm:num_preemptions_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_preemptions_created Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_created gauge
vllm:num_preemptions_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370125916e+09
# HELP vllm:prompt_tokens_total Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_total counter
vllm:prompt_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.444181e+06
# HELP vllm:prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_created gauge
vllm:prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370126052e+09
# HELP vllm:prompt_tokens_by_source_total Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_total counter
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.444181e+06
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 0.0
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 0.0
# HELP vllm:prompt_tokens_by_source_created Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_created gauge
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.7798085370126219e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 1.779808537012627e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 1.7798085370126324e+09
# HELP vllm:prompt_tokens_cached_total Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_total counter
vllm:prompt_tokens_cached_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_cached_created Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_created gauge
vllm:prompt_tokens_cached_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370126455e+09
# HELP vllm:prompt_tokens_recomputed_total Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_total counter
vllm:prompt_tokens_recomputed_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_recomputed_created Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_created gauge
vllm:prompt_tokens_recomputed_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370126567e+09
# HELP vllm:generation_tokens_total Number of generation tokens processed.
# TYPE vllm:generation_tokens_total counter
vllm:generation_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92672.0
# HELP vllm:generation_tokens_created Number of generation tokens processed.
# TYPE vllm:generation_tokens_created gauge
vllm:generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537012669e+09
# HELP vllm:request_success_total Count of successfully processed requests.
# TYPE vllm:request_success_total counter
vllm:request_success_total{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_success_total{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:request_success_created Count of successfully processed requests.
# TYPE vllm:request_success_created gauge
vllm:request_success_created{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537012699e+09
vllm:request_success_created{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370127063e+09
vllm:request_success_created{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370127132e+09
vllm:request_success_created{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370127187e+09
vllm:request_success_created{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370127244e+09
# HELP vllm:request_prompt_tokens Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens histogram
vllm:request_prompt_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prompt_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.444181e+06
# HELP vllm:request_prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens_created gauge
vllm:request_prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370127792e+09
# HELP vllm:request_generation_tokens Number of generation tokens processed.
# TYPE vllm:request_generation_tokens histogram
vllm:request_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92672.0
# HELP vllm:request_generation_tokens_created Number of generation tokens processed.
# TYPE vllm:request_generation_tokens_created gauge
vllm:request_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370128436e+09
# HELP vllm:iteration_tokens_total Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total histogram
vllm:iteration_tokens_total_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 2330.0
vllm:iteration_tokens_total_bucket{engine="0",le="8.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 12556.0
vllm:iteration_tokens_total_bucket{engine="0",le="16.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15964.0
vllm:iteration_tokens_total_bucket{engine="0",le="32.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16256.0
vllm:iteration_tokens_total_bucket{engine="0",le="64.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16256.0
vllm:iteration_tokens_total_bucket{engine="0",le="128.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16256.0
vllm:iteration_tokens_total_bucket{engine="0",le="256.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16256.0
vllm:iteration_tokens_total_bucket{engine="0",le="512.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16256.0
vllm:iteration_tokens_total_bucket{engine="0",le="1024.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16256.0
vllm:iteration_tokens_total_bucket{engine="0",le="2048.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16256.0
vllm:iteration_tokens_total_bucket{engine="0",le="4096.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16572.0
vllm:iteration_tokens_total_bucket{engine="0",le="8192.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16595.0
vllm:iteration_tokens_total_bucket{engine="0",le="16384.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16595.0
vllm:iteration_tokens_total_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16595.0
vllm:iteration_tokens_total_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16595.0
vllm:iteration_tokens_total_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.536853e+06
# HELP vllm:iteration_tokens_total_created Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total_created gauge
vllm:iteration_tokens_total_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537012879e+09
# HELP vllm:request_max_num_generation_tokens Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens histogram
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_max_num_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92672.0
# HELP vllm:request_max_num_generation_tokens_created Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens_created gauge
vllm:request_max_num_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370129192e+09
# HELP vllm:request_params_n Histogram of the n request parameter.
# TYPE vllm:request_params_n histogram
vllm:request_params_n_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_n_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_n_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_n_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_n_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_n_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_n_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_n_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
# HELP vllm:request_params_n_created Histogram of the n request parameter.
# TYPE vllm:request_params_n_created gauge
vllm:request_params_n_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537012967e+09
# HELP vllm:request_params_max_tokens Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens histogram
vllm:request_params_max_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_params_max_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92672.0
# HELP vllm:request_params_max_tokens_created Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens_created gauge
vllm:request_params_max_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370129972e+09
# HELP vllm:time_to_first_token_seconds Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds histogram
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.001",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.005",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.02",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.04",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.06",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.08",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.25",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 11.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 312.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 356.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="160.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="640.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2560.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 126.39743399620056
# HELP vllm:time_to_first_token_seconds_created Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds_created gauge
vllm:time_to_first_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370130339e+09
# HELP vllm:inter_token_latency_seconds Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds histogram
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 29463.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 88034.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 89987.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90034.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90034.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90034.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90149.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92128.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92128.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92310.0
vllm:inter_token_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1626.6343603626592
# HELP vllm:inter_token_latency_seconds_created Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds_created gauge
vllm:inter_token_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537013079e+09
# HELP vllm:request_time_per_output_token_seconds Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds histogram
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 45.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 316.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_time_per_output_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 6.378958275931998
# HELP vllm:request_time_per_output_token_seconds_created Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds_created gauge
vllm:request_time_per_output_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370131166e+09
# HELP vllm:e2e_request_latency_seconds Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds histogram
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 3.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 11.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 24.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 195.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:e2e_request_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1752.9047212600708
# HELP vllm:e2e_request_latency_seconds_created Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds_created gauge
vllm:e2e_request_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370142846e+09
# HELP vllm:request_queue_time_seconds Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds histogram
vllm:request_queue_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_queue_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0027433858485892415
# HELP vllm:request_queue_time_seconds_created Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds_created gauge
vllm:request_queue_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779808537014356e+09
# HELP vllm:request_inference_time_seconds Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds histogram
vllm:request_inference_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 3.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 12.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 27.0
vllm:request_inference_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 200.0
vllm:request_inference_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_inference_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1733.5705015579006
# HELP vllm:request_inference_time_seconds_created Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds_created gauge
vllm:request_inference_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370144022e+09
# HELP vllm:request_prefill_time_seconds Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds histogram
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 288.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 351.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 106.93614119524136
# HELP vllm:request_prefill_time_seconds_created Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds_created gauge
vllm:request_prefill_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370144622e+09
# HELP vllm:request_decode_time_seconds Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds histogram
vllm:request_decode_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 6.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 42.0
vllm:request_decode_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 221.0
vllm:request_decode_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_decode_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1626.6343603626592
# HELP vllm:request_decode_time_seconds_created Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds_created gauge
vllm:request_decode_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370145073e+09
# HELP vllm:request_prefill_kv_computed_tokens Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens histogram
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:request_prefill_kv_computed_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.444181e+06
# HELP vllm:request_prefill_kv_computed_tokens_created Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens_created gauge
vllm:request_prefill_kv_computed_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798085370145638e+09
# HELP vllm:cache_config_info Information of the LLMEngine CacheConfig
# TYPE vllm:cache_config_info gauge
vllm:cache_config_info{_block_size_resolved="True",block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_kvcache_space_bytes="None",enable_prefix_caching="True",engine="0",gpu_memory_utilization="0.9",is_attention_free="False",kv_cache_memory_bytes="None",kv_offloading_backend="native",kv_offloading_size="None",kv_sharing_fast_prefill="False",mamba_block_size="None",mamba_cache_dtype="auto",mamba_cache_mode="none",mamba_page_size_padded="None",mamba_ssm_cache_dtype="auto",num_cpu_blocks="None",num_gpu_blocks="17590",num_gpu_blocks_override="None",prefix_caching_hash_algo="sha256",sliding_window="None",user_specified_block_size="False"} 1.0
# HELP http_requests_total Total number of requests by method, status and handler.
# TYPE http_requests_total counter
http_requests_total{handler="/v1/models",method="GET",status="2xx"} 1.0
http_requests_total{handler="/v1/chat/completions",method="POST",status="2xx"} 362.0
# HELP http_requests_created Total number of requests by method, status and handler.
# TYPE http_requests_created gauge
http_requests_created{handler="/v1/models",method="GET",status="2xx"} 1.7798085390617669e+09
http_requests_created{handler="/v1/chat/completions",method="POST",status="2xx"} 1.7798085444920475e+09
# HELP http_request_size_bytes Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes summary
http_request_size_bytes_count{handler="/v1/models"} 1.0
http_request_size_bytes_sum{handler="/v1/models"} 0.0
http_request_size_bytes_count{handler="/v1/chat/completions"} 362.0
http_request_size_bytes_sum{handler="/v1/chat/completions"} 1.909188e+06
# HELP http_request_size_bytes_created Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes_created gauge
http_request_size_bytes_created{handler="/v1/models"} 1.7798085390617936e+09
http_request_size_bytes_created{handler="/v1/chat/completions"} 1.7798085444920697e+09
# HELP http_response_size_bytes Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes summary
http_response_size_bytes_count{handler="/v1/models"} 1.0
http_response_size_bytes_sum{handler="/v1/models"} 558.0
http_response_size_bytes_count{handler="/v1/chat/completions"} 362.0
http_response_size_bytes_sum{handler="/v1/chat/completions"} 0.0
# HELP http_response_size_bytes_created Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes_created gauge
http_response_size_bytes_created{handler="/v1/models"} 1.7798085390618207e+09
http_response_size_bytes_created{handler="/v1/chat/completions"} 1.779808544492094e+09
# HELP http_request_duration_highr_seconds Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds histogram
http_request_duration_highr_seconds_bucket{le="0.01"} 1.0
http_request_duration_highr_seconds_bucket{le="0.025"} 1.0
http_request_duration_highr_seconds_bucket{le="0.05"} 1.0
http_request_duration_highr_seconds_bucket{le="0.075"} 1.0
http_request_duration_highr_seconds_bucket{le="0.1"} 1.0
http_request_duration_highr_seconds_bucket{le="0.25"} 1.0
http_request_duration_highr_seconds_bucket{le="0.5"} 1.0
http_request_duration_highr_seconds_bucket{le="0.75"} 1.0
http_request_duration_highr_seconds_bucket{le="1.0"} 1.0
http_request_duration_highr_seconds_bucket{le="1.5"} 4.0
http_request_duration_highr_seconds_bucket{le="2.0"} 12.0
http_request_duration_highr_seconds_bucket{le="2.5"} 25.0
http_request_duration_highr_seconds_bucket{le="3.0"} 57.0
http_request_duration_highr_seconds_bucket{le="3.5"} 92.0
http_request_duration_highr_seconds_bucket{le="4.0"} 108.0
http_request_duration_highr_seconds_bucket{le="4.5"} 151.0
http_request_duration_highr_seconds_bucket{le="5.0"} 196.0
http_request_duration_highr_seconds_bucket{le="7.5"} 349.0
http_request_duration_highr_seconds_bucket{le="10.0"} 363.0
http_request_duration_highr_seconds_bucket{le="30.0"} 363.0
http_request_duration_highr_seconds_bucket{le="60.0"} 363.0
http_request_duration_highr_seconds_bucket{le="+Inf"} 363.0
http_request_duration_highr_seconds_count 363.0
http_request_duration_highr_seconds_sum 1753.430982518359
# HELP http_request_duration_highr_seconds_created Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds_created gauge
http_request_duration_highr_seconds_created 1.7798085375314133e+09
# HELP http_request_duration_seconds Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{handler="/v1/models",le="0.1",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="0.5",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="1.0",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="+Inf",method="GET"} 1.0
http_request_duration_seconds_count{handler="/v1/models",method="GET"} 1.0
http_request_duration_seconds_sum{handler="/v1/models",method="GET"} 0.0023236559936776757
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.1",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.5",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="1.0",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="+Inf",method="POST"} 362.0
http_request_duration_seconds_count{handler="/v1/chat/completions",method="POST"} 362.0
http_request_duration_seconds_sum{handler="/v1/chat/completions",method="POST"} 1753.4286588623654
# HELP http_request_duration_seconds_created Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds_created gauge
http_request_duration_seconds_created{handler="/v1/models",method="GET"} 1.7798085390618532e+09
http_request_duration_seconds_created{handler="/v1/chat/completions",method="POST"} 1.7798085444921227e+09
