# HELP python_gc_objects_collected_total Objects collected during gc
# TYPE python_gc_objects_collected_total counter
python_gc_objects_collected_total{generation="0"} 11855.0
python_gc_objects_collected_total{generation="1"} 1664.0
python_gc_objects_collected_total{generation="2"} 855.0
# HELP python_gc_objects_uncollectable_total Uncollectable objects found during GC
# TYPE python_gc_objects_uncollectable_total counter
python_gc_objects_uncollectable_total{generation="0"} 0.0
python_gc_objects_uncollectable_total{generation="1"} 0.0
python_gc_objects_uncollectable_total{generation="2"} 0.0
# HELP python_gc_collections_total Number of times this generation was collected
# TYPE python_gc_collections_total counter
python_gc_collections_total{generation="0"} 1337.0
python_gc_collections_total{generation="1"} 122.0
python_gc_collections_total{generation="2"} 9.0
# HELP python_info Python platform information
# TYPE python_info gauge
python_info{implementation="CPython",major="3",minor="12",patchlevel="3",version="3.12.3"} 1.0
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 4.106807296e+010
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 1.381404672e+09
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.77981057777e+09
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 38.64
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 67.0
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 1.048575e+06
# HELP vllm:estimated_flops_per_gpu_total Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_total counter
vllm:estimated_flops_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_flops_per_gpu_created Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_created gauge
vllm:estimated_flops_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826448212e+09
# HELP vllm:estimated_read_bytes_per_gpu_total Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_total counter
vllm:estimated_read_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_read_bytes_per_gpu_created Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_created gauge
vllm:estimated_read_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826448493e+09
# HELP vllm:estimated_write_bytes_per_gpu_total Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_total counter
vllm:estimated_write_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_write_bytes_per_gpu_created Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_created gauge
vllm:estimated_write_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779810682644866e+09
# HELP vllm:num_requests_running Number of requests in model execution batches.
# TYPE vllm:num_requests_running gauge
vllm:num_requests_running{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_requests_waiting Number of requests waiting to be processed.
# TYPE vllm:num_requests_waiting gauge
vllm:num_requests_waiting{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:engine_sleep_state Engine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.
# TYPE vllm:engine_sleep_state gauge
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="awake"} 1.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="weights_offloaded"} 0.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="discard_all"} 0.0
# HELP vllm:kv_cache_usage_perc KV-cache usage. 1 means 100 percent usage.
# TYPE vllm:kv_cache_usage_perc gauge
vllm:kv_cache_usage_perc{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_queries_total Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_total counter
vllm:prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.455799e+06
# HELP vllm:prefix_cache_queries_created Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_created gauge
vllm:prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826450274e+09
# HELP vllm:prefix_cache_hits_total Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_total counter
vllm:prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_hits_created Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_created gauge
vllm:prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826450443e+09
# HELP vllm:external_prefix_cache_queries_total External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_total counter
vllm:external_prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.455799e+06
# HELP vllm:external_prefix_cache_queries_created External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_created gauge
vllm:external_prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826450567e+09
# HELP vllm:external_prefix_cache_hits_total External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_total counter
vllm:external_prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:external_prefix_cache_hits_created External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_created gauge
vllm:external_prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.77981068264507e+09
# HELP vllm:mm_cache_queries_total Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_total counter
vllm:mm_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_queries_created Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_created gauge
vllm:mm_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826450815e+09
# HELP vllm:mm_cache_hits_total Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_total counter
vllm:mm_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_hits_created Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_created gauge
vllm:mm_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779810682645094e+09
# HELP vllm:num_preemptions_total Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_total counter
vllm:num_preemptions_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_preemptions_created Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_created gauge
vllm:num_preemptions_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826451044e+09
# HELP vllm:prompt_tokens_total Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_total counter
vllm:prompt_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.455799e+06
# HELP vllm:prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_created gauge
vllm:prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826451144e+09
# HELP vllm:prompt_tokens_by_source_total Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_total counter
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.455799e+06
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 0.0
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 0.0
# HELP vllm:prompt_tokens_by_source_created Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_created gauge
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.7798106826451285e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 1.7798106826451337e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 1.779810682645141e+09
# HELP vllm:prompt_tokens_cached_total Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_total counter
vllm:prompt_tokens_cached_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_cached_created Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_created gauge
vllm:prompt_tokens_cached_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826451623e+09
# HELP vllm:prompt_tokens_recomputed_total Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_total counter
vllm:prompt_tokens_recomputed_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_recomputed_created Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_created gauge
vllm:prompt_tokens_recomputed_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826451778e+09
# HELP vllm:generation_tokens_total Number of generation tokens processed.
# TYPE vllm:generation_tokens_total counter
vllm:generation_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93440.0
# HELP vllm:generation_tokens_created Number of generation tokens processed.
# TYPE vllm:generation_tokens_created gauge
vllm:generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826452005e+09
# HELP vllm:request_success_total Count of successfully processed requests.
# TYPE vllm:request_success_total counter
vllm:request_success_total{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_success_total{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:request_success_created Count of successfully processed requests.
# TYPE vllm:request_success_created gauge
vllm:request_success_created{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826452274e+09
vllm:request_success_created{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826452372e+09
vllm:request_success_created{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826452427e+09
vllm:request_success_created{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826452477e+09
vllm:request_success_created{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826452534e+09
# HELP vllm:request_prompt_tokens Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens histogram
vllm:request_prompt_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prompt_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.455799e+06
# HELP vllm:request_prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens_created gauge
vllm:request_prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826452968e+09
# HELP vllm:request_generation_tokens Number of generation tokens processed.
# TYPE vllm:request_generation_tokens histogram
vllm:request_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93440.0
# HELP vllm:request_generation_tokens_created Number of generation tokens processed.
# TYPE vllm:request_generation_tokens_created gauge
vllm:request_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826453574e+09
# HELP vllm:iteration_tokens_total Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total histogram
vllm:iteration_tokens_total_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 3061.0
vllm:iteration_tokens_total_bucket{engine="0",le="8.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 12382.0
vllm:iteration_tokens_total_bucket{engine="0",le="16.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15890.0
vllm:iteration_tokens_total_bucket{engine="0",le="32.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16333.0
vllm:iteration_tokens_total_bucket{engine="0",le="64.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16333.0
vllm:iteration_tokens_total_bucket{engine="0",le="128.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16333.0
vllm:iteration_tokens_total_bucket{engine="0",le="256.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16333.0
vllm:iteration_tokens_total_bucket{engine="0",le="512.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16333.0
vllm:iteration_tokens_total_bucket{engine="0",le="1024.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16333.0
vllm:iteration_tokens_total_bucket{engine="0",le="2048.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16333.0
vllm:iteration_tokens_total_bucket{engine="0",le="4096.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16646.0
vllm:iteration_tokens_total_bucket{engine="0",le="8192.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16672.0
vllm:iteration_tokens_total_bucket{engine="0",le="16384.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16672.0
vllm:iteration_tokens_total_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16672.0
vllm:iteration_tokens_total_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 16672.0
vllm:iteration_tokens_total_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.549239e+06
# HELP vllm:iteration_tokens_total_created Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total_created gauge
vllm:iteration_tokens_total_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826453998e+09
# HELP vllm:request_max_num_generation_tokens Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens histogram
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_max_num_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93440.0
# HELP vllm:request_max_num_generation_tokens_created Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens_created gauge
vllm:request_max_num_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779810682645445e+09
# HELP vllm:request_params_n Histogram of the n request parameter.
# TYPE vllm:request_params_n histogram
vllm:request_params_n_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_n_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_n_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_n_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_n_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_n_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_n_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_n_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
# HELP vllm:request_params_n_created Histogram of the n request parameter.
# TYPE vllm:request_params_n_created gauge
vllm:request_params_n_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826454847e+09
# HELP vllm:request_params_max_tokens Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens histogram
vllm:request_params_max_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_params_max_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93440.0
# HELP vllm:request_params_max_tokens_created Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens_created gauge
vllm:request_params_max_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826455128e+09
# HELP vllm:time_to_first_token_seconds Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds histogram
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.001",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.005",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.02",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.04",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.06",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.08",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.25",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 4.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 309.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 348.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 362.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="160.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="640.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2560.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:time_to_first_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 130.10236072540283
# HELP vllm:time_to_first_token_seconds_created Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds_created gauge
vllm:time_to_first_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826455622e+09
# HELP vllm:inter_token_latency_seconds Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds histogram
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 30048.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 88447.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90523.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90637.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90637.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90637.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 90960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92862.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 92862.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93075.0
vllm:inter_token_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1695.5502396721859
# HELP vllm:inter_token_latency_seconds_created Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds_created gauge
vllm:inter_token_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826456032e+09
# HELP vllm:request_time_per_output_token_seconds Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds histogram
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 42.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 305.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_time_per_output_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 6.649216626165432
# HELP vllm:request_time_per_output_token_seconds_created Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds_created gauge
vllm:request_time_per_output_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779810682645645e+09
# HELP vllm:e2e_request_latency_seconds Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds histogram
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 2.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 11.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 29.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 182.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:e2e_request_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1825.4403264522552
# HELP vllm:e2e_request_latency_seconds_created Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds_created gauge
vllm:e2e_request_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826457312e+09
# HELP vllm:request_queue_time_seconds Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds histogram
vllm:request_queue_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_queue_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.04605136066675186
# HELP vllm:request_queue_time_seconds_created Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds_created gauge
vllm:request_queue_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826457753e+09
# HELP vllm:request_inference_time_seconds Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds histogram
vllm:request_inference_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 3.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 11.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 30.0
vllm:request_inference_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 188.0
vllm:request_inference_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_inference_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1804.7606877679937
# HELP vllm:request_inference_time_seconds_created Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds_created gauge
vllm:request_inference_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826458116e+09
# HELP vllm:request_prefill_time_seconds Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds histogram
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 292.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 347.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 363.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 109.2104480958078
# HELP vllm:request_prefill_time_seconds_created Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds_created gauge
vllm:request_prefill_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826458645e+09
# HELP vllm:request_decode_time_seconds Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds histogram
vllm:request_decode_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 5.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 21.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 42.0
vllm:request_decode_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 216.0
vllm:request_decode_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_decode_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1695.5502396721859
# HELP vllm:request_decode_time_seconds_created Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds_created gauge
vllm:request_decode_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779810682645899e+09
# HELP vllm:request_prefill_kv_computed_tokens Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens histogram
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 365.0
vllm:request_prefill_kv_computed_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.455799e+06
# HELP vllm:request_prefill_kv_computed_tokens_created Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens_created gauge
vllm:request_prefill_kv_computed_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798106826459525e+09
# HELP vllm:cache_config_info Information of the LLMEngine CacheConfig
# TYPE vllm:cache_config_info gauge
vllm:cache_config_info{_block_size_resolved="True",block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_kvcache_space_bytes="None",enable_prefix_caching="True",engine="0",gpu_memory_utilization="0.9",is_attention_free="False",kv_cache_memory_bytes="None",kv_offloading_backend="native",kv_offloading_size="None",kv_sharing_fast_prefill="False",mamba_block_size="None",mamba_cache_dtype="auto",mamba_cache_mode="none",mamba_page_size_padded="None",mamba_ssm_cache_dtype="auto",num_cpu_blocks="None",num_gpu_blocks="17590",num_gpu_blocks_override="None",prefix_caching_hash_algo="sha256",sliding_window="None",user_specified_block_size="False"} 1.0
# HELP http_requests_total Total number of requests by method, status and handler.
# TYPE http_requests_total counter
http_requests_total{handler="/v1/models",method="GET",status="2xx"} 1.0
http_requests_total{handler="/v1/chat/completions",method="POST",status="2xx"} 365.0
# HELP http_requests_created Total number of requests by method, status and handler.
# TYPE http_requests_created gauge
http_requests_created{handler="/v1/models",method="GET",status="2xx"} 1.7798106845094843e+09
http_requests_created{handler="/v1/chat/completions",method="POST",status="2xx"} 1.7798106915180178e+09
# HELP http_request_size_bytes Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes summary
http_request_size_bytes_count{handler="/v1/models"} 1.0
http_request_size_bytes_sum{handler="/v1/models"} 0.0
http_request_size_bytes_count{handler="/v1/chat/completions"} 365.0
http_request_size_bytes_sum{handler="/v1/chat/completions"} 1.92501e+06
# HELP http_request_size_bytes_created Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes_created gauge
http_request_size_bytes_created{handler="/v1/models"} 1.7798106845095081e+09
http_request_size_bytes_created{handler="/v1/chat/completions"} 1.7798106915180402e+09
# HELP http_response_size_bytes Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes summary
http_response_size_bytes_count{handler="/v1/models"} 1.0
http_response_size_bytes_sum{handler="/v1/models"} 558.0
http_response_size_bytes_count{handler="/v1/chat/completions"} 365.0
http_response_size_bytes_sum{handler="/v1/chat/completions"} 0.0
# HELP http_response_size_bytes_created Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes_created gauge
http_response_size_bytes_created{handler="/v1/models"} 1.7798106845095322e+09
http_response_size_bytes_created{handler="/v1/chat/completions"} 1.779810691518067e+09
# HELP http_request_duration_highr_seconds Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds histogram
http_request_duration_highr_seconds_bucket{le="0.01"} 1.0
http_request_duration_highr_seconds_bucket{le="0.025"} 1.0
http_request_duration_highr_seconds_bucket{le="0.05"} 1.0
http_request_duration_highr_seconds_bucket{le="0.075"} 1.0
http_request_duration_highr_seconds_bucket{le="0.1"} 1.0
http_request_duration_highr_seconds_bucket{le="0.25"} 1.0
http_request_duration_highr_seconds_bucket{le="0.5"} 1.0
http_request_duration_highr_seconds_bucket{le="0.75"} 1.0
http_request_duration_highr_seconds_bucket{le="1.0"} 1.0
http_request_duration_highr_seconds_bucket{le="1.5"} 3.0
http_request_duration_highr_seconds_bucket{le="2.0"} 12.0
http_request_duration_highr_seconds_bucket{le="2.5"} 30.0
http_request_duration_highr_seconds_bucket{le="3.0"} 51.0
http_request_duration_highr_seconds_bucket{le="3.5"} 89.0
http_request_duration_highr_seconds_bucket{le="4.0"} 116.0
http_request_duration_highr_seconds_bucket{le="4.5"} 152.0
http_request_duration_highr_seconds_bucket{le="5.0"} 183.0
http_request_duration_highr_seconds_bucket{le="7.5"} 328.0
http_request_duration_highr_seconds_bucket{le="10.0"} 366.0
http_request_duration_highr_seconds_bucket{le="30.0"} 366.0
http_request_duration_highr_seconds_bucket{le="60.0"} 366.0
http_request_duration_highr_seconds_bucket{le="+Inf"} 366.0
http_request_duration_highr_seconds_count 366.0
http_request_duration_highr_seconds_sum 1825.9695772863342
# HELP http_request_duration_highr_seconds_created Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds_created gauge
http_request_duration_highr_seconds_created 1.7798106830941143e+09
# HELP http_request_duration_seconds Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{handler="/v1/models",le="0.1",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="0.5",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="1.0",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="+Inf",method="GET"} 1.0
http_request_duration_seconds_count{handler="/v1/models",method="GET"} 1.0
http_request_duration_seconds_sum{handler="/v1/models",method="GET"} 0.0021685969550162554
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.1",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.5",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="1.0",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="+Inf",method="POST"} 365.0
http_request_duration_seconds_count{handler="/v1/chat/completions",method="POST"} 365.0
http_request_duration_seconds_sum{handler="/v1/chat/completions",method="POST"} 1825.9674086893792
# HELP http_request_duration_seconds_created Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds_created gauge
http_request_duration_seconds_created{handler="/v1/models",method="GET"} 1.7798106845095782e+09
http_request_duration_seconds_created{handler="/v1/chat/completions",method="POST"} 1.7798106915180964e+09
