# HELP python_gc_objects_collected_total Objects collected during gc
# TYPE python_gc_objects_collected_total counter
python_gc_objects_collected_total{generation="0"} 11970.0
python_gc_objects_collected_total{generation="1"} 1549.0
python_gc_objects_collected_total{generation="2"} 855.0
# HELP python_gc_objects_uncollectable_total Uncollectable objects found during GC
# TYPE python_gc_objects_uncollectable_total counter
python_gc_objects_uncollectable_total{generation="0"} 0.0
python_gc_objects_uncollectable_total{generation="1"} 0.0
python_gc_objects_uncollectable_total{generation="2"} 0.0
# HELP python_gc_collections_total Number of times this generation was collected
# TYPE python_gc_collections_total counter
python_gc_collections_total{generation="0"} 1349.0
python_gc_collections_total{generation="1"} 123.0
python_gc_collections_total{generation="2"} 9.0
# HELP python_info Python platform information
# TYPE python_info gauge
python_info{implementation="CPython",major="3",minor="12",patchlevel="3",version="3.12.3"} 1.0
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 4.099221504e+010
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 1.380847616e+09
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.77981354873e+09
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 41.89
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 67.0
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 1.048575e+06
# HELP vllm:estimated_flops_per_gpu_total Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_total counter
vllm:estimated_flops_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_flops_per_gpu_created Estimated number of floating point operations per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_flops_per_gpu_created gauge
vllm:estimated_flops_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516264124e+09
# HELP vllm:estimated_read_bytes_per_gpu_total Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_total counter
vllm:estimated_read_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_read_bytes_per_gpu_created Estimated number of bytes read from memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_read_bytes_per_gpu_created gauge
vllm:estimated_read_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516264498e+09
# HELP vllm:estimated_write_bytes_per_gpu_total Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_total counter
vllm:estimated_write_bytes_per_gpu_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:estimated_write_bytes_per_gpu_created Estimated number of bytes written to memory per GPU (for Model Flops Utilization calculations).
# TYPE vllm:estimated_write_bytes_per_gpu_created gauge
vllm:estimated_write_bytes_per_gpu_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.77981365162647e+09
# HELP vllm:num_requests_running Number of requests in model execution batches.
# TYPE vllm:num_requests_running gauge
vllm:num_requests_running{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_requests_waiting Number of requests waiting to be processed.
# TYPE vllm:num_requests_waiting gauge
vllm:num_requests_waiting{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:engine_sleep_state Engine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.
# TYPE vllm:engine_sleep_state gauge
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="awake"} 1.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="weights_offloaded"} 0.0
vllm:engine_sleep_state{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",sleep_state="discard_all"} 0.0
# HELP vllm:kv_cache_usage_perc KV-cache usage. 1 means 100 percent usage.
# TYPE vllm:kv_cache_usage_perc gauge
vllm:kv_cache_usage_perc{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_queries_total Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_total counter
vllm:prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.563446e+06
# HELP vllm:prefix_cache_queries_created Prefix cache queries, in terms of number of queried tokens.
# TYPE vllm:prefix_cache_queries_created gauge
vllm:prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516266289e+09
# HELP vllm:prefix_cache_hits_total Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_total counter
vllm:prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prefix_cache_hits_created Prefix cache hits, in terms of number of cached tokens.
# TYPE vllm:prefix_cache_hits_created gauge
vllm:prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516266432e+09
# HELP vllm:external_prefix_cache_queries_total External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_total counter
vllm:external_prefix_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.563446e+06
# HELP vllm:external_prefix_cache_queries_created External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.
# TYPE vllm:external_prefix_cache_queries_created gauge
vllm:external_prefix_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516266596e+09
# HELP vllm:external_prefix_cache_hits_total External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_total counter
vllm:external_prefix_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:external_prefix_cache_hits_created External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.
# TYPE vllm:external_prefix_cache_hits_created gauge
vllm:external_prefix_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516266723e+09
# HELP vllm:mm_cache_queries_total Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_total counter
vllm:mm_cache_queries_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_queries_created Multi-modal cache queries, in terms of number of queried items.
# TYPE vllm:mm_cache_queries_created gauge
vllm:mm_cache_queries_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516266844e+09
# HELP vllm:mm_cache_hits_total Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_total counter
vllm:mm_cache_hits_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:mm_cache_hits_created Multi-modal cache hits, in terms of number of cached items.
# TYPE vllm:mm_cache_hits_created gauge
vllm:mm_cache_hits_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516266956e+09
# HELP vllm:num_preemptions_total Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_total counter
vllm:num_preemptions_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:num_preemptions_created Cumulative number of preemption from the engine.
# TYPE vllm:num_preemptions_created gauge
vllm:num_preemptions_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516267076e+09
# HELP vllm:prompt_tokens_total Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_total counter
vllm:prompt_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.563446e+06
# HELP vllm:prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:prompt_tokens_created gauge
vllm:prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779813651626718e+09
# HELP vllm:prompt_tokens_by_source_total Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_total counter
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.563446e+06
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 0.0
vllm:prompt_tokens_by_source_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 0.0
# HELP vllm:prompt_tokens_by_source_created Number of prompt tokens by source.
# TYPE vllm:prompt_tokens_by_source_created gauge
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_compute"} 1.7798136516267326e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="local_cache_hit"} 1.7798136516267376e+09
vllm:prompt_tokens_by_source_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct",source="external_kv_transfer"} 1.7798136516267424e+09
# HELP vllm:prompt_tokens_cached_total Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_total counter
vllm:prompt_tokens_cached_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_cached_created Number of cached prompt tokens (local + external).
# TYPE vllm:prompt_tokens_cached_created gauge
vllm:prompt_tokens_cached_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779813651626754e+09
# HELP vllm:prompt_tokens_recomputed_total Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_total counter
vllm:prompt_tokens_recomputed_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:prompt_tokens_recomputed_created Number of cached tokens recomputed for forward pass.
# TYPE vllm:prompt_tokens_recomputed_created gauge
vllm:prompt_tokens_recomputed_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779813651626765e+09
# HELP vllm:generation_tokens_total Number of generation tokens processed.
# TYPE vllm:generation_tokens_total counter
vllm:generation_tokens_total{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 100352.0
# HELP vllm:generation_tokens_created Number of generation tokens processed.
# TYPE vllm:generation_tokens_created gauge
vllm:generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516267776e+09
# HELP vllm:request_success_total Count of successfully processed requests.
# TYPE vllm:request_success_total counter
vllm:request_success_total{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_success_total{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_success_total{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
# HELP vllm:request_success_created Count of successfully processed requests.
# TYPE vllm:request_success_created gauge
vllm:request_success_created{engine="0",finished_reason="stop",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516268065e+09
vllm:request_success_created{engine="0",finished_reason="length",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516268144e+09
vllm:request_success_created{engine="0",finished_reason="abort",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516268213e+09
vllm:request_success_created{engine="0",finished_reason="error",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516268265e+09
vllm:request_success_created{engine="0",finished_reason="repetition",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516268313e+09
# HELP vllm:request_prompt_tokens Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens histogram
vllm:request_prompt_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prompt_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prompt_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.563446e+06
# HELP vllm:request_prompt_tokens_created Number of prefill tokens processed.
# TYPE vllm:request_prompt_tokens_created gauge
vllm:request_prompt_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779813651626876e+09
# HELP vllm:request_generation_tokens Number of generation tokens processed.
# TYPE vllm:request_generation_tokens histogram
vllm:request_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 100352.0
# HELP vllm:request_generation_tokens_created Number of generation tokens processed.
# TYPE vllm:request_generation_tokens_created gauge
vllm:request_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516269345e+09
# HELP vllm:iteration_tokens_total Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total histogram
vllm:iteration_tokens_total_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 3027.0
vllm:iteration_tokens_total_bucket{engine="0",le="8.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 10335.0
vllm:iteration_tokens_total_bucket{engine="0",le="16.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 14359.0
vllm:iteration_tokens_total_bucket{engine="0",le="32.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15043.0
vllm:iteration_tokens_total_bucket{engine="0",le="64.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15043.0
vllm:iteration_tokens_total_bucket{engine="0",le="128.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15043.0
vllm:iteration_tokens_total_bucket{engine="0",le="256.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15043.0
vllm:iteration_tokens_total_bucket{engine="0",le="512.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15043.0
vllm:iteration_tokens_total_bucket{engine="0",le="1024.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15043.0
vllm:iteration_tokens_total_bucket{engine="0",le="2048.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15043.0
vllm:iteration_tokens_total_bucket{engine="0",le="4096.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15389.0
vllm:iteration_tokens_total_bucket{engine="0",le="8192.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15412.0
vllm:iteration_tokens_total_bucket{engine="0",le="16384.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15412.0
vllm:iteration_tokens_total_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15412.0
vllm:iteration_tokens_total_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 15412.0
vllm:iteration_tokens_total_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.663798e+06
# HELP vllm:iteration_tokens_total_created Histogram of number of tokens per engine_step.
# TYPE vllm:iteration_tokens_total_created gauge
vllm:iteration_tokens_total_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516270025e+09
# HELP vllm:request_max_num_generation_tokens Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens histogram
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_max_num_generation_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 100352.0
# HELP vllm:request_max_num_generation_tokens_created Histogram of maximum number of requested generation tokens.
# TYPE vllm:request_max_num_generation_tokens_created gauge
vllm:request_max_num_generation_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516270401e+09
# HELP vllm:request_params_n Histogram of the n request parameter.
# TYPE vllm:request_params_n histogram
vllm:request_params_n_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_n_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_n_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_n_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_n_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_n_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_n_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_n_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
# HELP vllm:request_params_n_created Histogram of the n request parameter.
# TYPE vllm:request_params_n_created gauge
vllm:request_params_n_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779813651627073e+09
# HELP vllm:request_params_max_tokens Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens histogram
vllm:request_params_max_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_params_max_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_params_max_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 100352.0
# HELP vllm:request_params_max_tokens_created Histogram of the max_tokens request parameter.
# TYPE vllm:request_params_max_tokens_created gauge
vllm:request_params_max_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779813651627101e+09
# HELP vllm:time_to_first_token_seconds Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds histogram
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.001",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.005",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.02",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.04",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.06",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.08",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.25",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 13.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 335.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 385.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 391.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="160.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="640.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="2560.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:time_to_first_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 139.64915657043457
# HELP vllm:time_to_first_token_seconds_created Histogram of time to first token in seconds.
# TYPE vllm:time_to_first_token_seconds_created gauge
vllm:time_to_first_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516271358e+09
# HELP vllm:inter_token_latency_seconds Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds histogram
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 23661.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 93949.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 96702.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 96748.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 96748.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 96748.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 96904.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99760.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99760.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 99960.0
vllm:inter_token_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 2037.7261010148213
# HELP vllm:inter_token_latency_seconds_created Histogram of inter-token latency in seconds.
# TYPE vllm:inter_token_latency_seconds_created gauge
vllm:inter_token_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516271784e+09
# HELP vllm:request_time_per_output_token_seconds Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds histogram
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.01",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 32.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.025",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 277.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.05",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.075",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.1",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.15",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.2",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.4",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="0.75",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="7.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="80.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_time_per_output_token_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 7.991082749077725
# HELP vllm:request_time_per_output_token_seconds_created Histogram of time_per_output_token_seconds per request.
# TYPE vllm:request_time_per_output_token_seconds_created gauge
vllm:request_time_per_output_token_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516272125e+09
# HELP vllm:e2e_request_latency_seconds Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds histogram
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 2.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 11.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 17.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 165.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:e2e_request_latency_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 2177.253833055496
# HELP vllm:e2e_request_latency_seconds_created Histogram of e2e request latency in seconds.
# TYPE vllm:e2e_request_latency_seconds_created gauge
vllm:e2e_request_latency_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.779813651627295e+09
# HELP vllm:request_queue_time_seconds Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds histogram
vllm:request_queue_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_queue_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0029381027561612427
# HELP vllm:request_queue_time_seconds_created Histogram of time spent in WAITING phase for request.
# TYPE vllm:request_queue_time_seconds_created gauge
vllm:request_queue_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516273322e+09
# HELP vllm:request_inference_time_seconds Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds histogram
vllm:request_inference_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 3.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 12.0
vllm:request_inference_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 18.0
vllm:request_inference_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 168.0
vllm:request_inference_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_inference_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 2156.0547709063394
# HELP vllm:request_inference_time_seconds_created Histogram of time spent in RUNNING phase for request.
# TYPE vllm:request_inference_time_seconds_created gauge
vllm:request_inference_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516273665e+09
# HELP vllm:request_prefill_time_seconds Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds histogram
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 305.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 379.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 118.32866989151808
# HELP vllm:request_prefill_time_seconds_created Histogram of time spent in PREFILL phase for request.
# TYPE vllm:request_prefill_time_seconds_created gauge
vllm:request_prefill_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516274056e+09
# HELP vllm:request_decode_time_seconds Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds histogram
vllm:request_decode_time_seconds_bucket{engine="0",le="0.3",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="0.8",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 6.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 14.0
vllm:request_decode_time_seconds_bucket{engine="0",le="2.5",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 31.0
vllm:request_decode_time_seconds_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 189.0
vllm:request_decode_time_seconds_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="15.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="30.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="40.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="60.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="120.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="240.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="480.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="960.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="1920.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="7680.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_decode_time_seconds_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 2037.7261010148213
# HELP vllm:request_decode_time_seconds_created Histogram of time spent in DECODE phase for request.
# TYPE vllm:request_decode_time_seconds_created gauge
vllm:request_decode_time_seconds_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516274517e+09
# HELP vllm:request_prefill_kv_computed_tokens Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens histogram
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="500.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="1000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="2000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 0.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="5000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="10000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="20000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="50000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="100000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="200000.0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_bucket{engine="0",le="+Inf",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_count{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 392.0
vllm:request_prefill_kv_computed_tokens_sum{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.563446e+06
# HELP vllm:request_prefill_kv_computed_tokens_created Histogram of new KV tokens computed during prefill (excluding cached tokens).
# TYPE vllm:request_prefill_kv_computed_tokens_created gauge
vllm:request_prefill_kv_computed_tokens_created{engine="0",model_name="/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct"} 1.7798136516274922e+09
# HELP vllm:cache_config_info Information of the LLMEngine CacheConfig
# TYPE vllm:cache_config_info gauge
vllm:cache_config_info{_block_size_resolved="True",block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_kvcache_space_bytes="None",enable_prefix_caching="True",engine="0",gpu_memory_utilization="0.9",is_attention_free="False",kv_cache_memory_bytes="None",kv_offloading_backend="native",kv_offloading_size="None",kv_sharing_fast_prefill="False",mamba_block_size="None",mamba_cache_dtype="auto",mamba_cache_mode="none",mamba_page_size_padded="None",mamba_ssm_cache_dtype="auto",num_cpu_blocks="None",num_gpu_blocks="17590",num_gpu_blocks_override="None",prefix_caching_hash_algo="sha256",sliding_window="None",user_specified_block_size="False"} 1.0
# HELP http_requests_total Total number of requests by method, status and handler.
# TYPE http_requests_total counter
http_requests_total{handler="/v1/models",method="GET",status="2xx"} 1.0
http_requests_total{handler="/v1/chat/completions",method="POST",status="2xx"} 392.0
# HELP http_requests_created Total number of requests by method, status and handler.
# TYPE http_requests_created gauge
http_requests_created{handler="/v1/models",method="GET",status="2xx"} 1.7798136534283202e+09
http_requests_created{handler="/v1/chat/completions",method="POST",status="2xx"} 1.7798136621262043e+09
# HELP http_request_size_bytes Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes summary
http_request_size_bytes_count{handler="/v1/models"} 1.0
http_request_size_bytes_sum{handler="/v1/models"} 0.0
http_request_size_bytes_count{handler="/v1/chat/completions"} 392.0
http_request_size_bytes_sum{handler="/v1/chat/completions"} 2.067408e+06
# HELP http_request_size_bytes_created Content length of incoming requests by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_request_size_bytes_created gauge
http_request_size_bytes_created{handler="/v1/models"} 1.779813653428346e+09
http_request_size_bytes_created{handler="/v1/chat/completions"} 1.7798136621262243e+09
# HELP http_response_size_bytes Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes summary
http_response_size_bytes_count{handler="/v1/models"} 1.0
http_response_size_bytes_sum{handler="/v1/models"} 558.0
http_response_size_bytes_count{handler="/v1/chat/completions"} 392.0
http_response_size_bytes_sum{handler="/v1/chat/completions"} 0.0
# HELP http_response_size_bytes_created Content length of outgoing responses by handler. Only value of header is respected. Otherwise ignored. No percentile calculated. 
# TYPE http_response_size_bytes_created gauge
http_response_size_bytes_created{handler="/v1/models"} 1.7798136534283705e+09
http_response_size_bytes_created{handler="/v1/chat/completions"} 1.7798136621262496e+09
# HELP http_request_duration_highr_seconds Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds histogram
http_request_duration_highr_seconds_bucket{le="0.01"} 1.0
http_request_duration_highr_seconds_bucket{le="0.025"} 1.0
http_request_duration_highr_seconds_bucket{le="0.05"} 1.0
http_request_duration_highr_seconds_bucket{le="0.075"} 1.0
http_request_duration_highr_seconds_bucket{le="0.1"} 1.0
http_request_duration_highr_seconds_bucket{le="0.25"} 1.0
http_request_duration_highr_seconds_bucket{le="0.5"} 1.0
http_request_duration_highr_seconds_bucket{le="0.75"} 1.0
http_request_duration_highr_seconds_bucket{le="1.0"} 1.0
http_request_duration_highr_seconds_bucket{le="1.5"} 3.0
http_request_duration_highr_seconds_bucket{le="2.0"} 12.0
http_request_duration_highr_seconds_bucket{le="2.5"} 18.0
http_request_duration_highr_seconds_bucket{le="3.0"} 38.0
http_request_duration_highr_seconds_bucket{le="3.5"} 59.0
http_request_duration_highr_seconds_bucket{le="4.0"} 92.0
http_request_duration_highr_seconds_bucket{le="4.5"} 134.0
http_request_duration_highr_seconds_bucket{le="5.0"} 166.0
http_request_duration_highr_seconds_bucket{le="7.5"} 317.0
http_request_duration_highr_seconds_bucket{le="10.0"} 393.0
http_request_duration_highr_seconds_bucket{le="30.0"} 393.0
http_request_duration_highr_seconds_bucket{le="60.0"} 393.0
http_request_duration_highr_seconds_bucket{le="+Inf"} 393.0
http_request_duration_highr_seconds_count 393.0
http_request_duration_highr_seconds_sum 2177.854818835389
# HELP http_request_duration_highr_seconds_created Latency with many buckets but no API specific labels. Made for more accurate percentile calculations. 
# TYPE http_request_duration_highr_seconds_created gauge
http_request_duration_highr_seconds_created 1.779813652123545e+09
# HELP http_request_duration_seconds Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{handler="/v1/models",le="0.1",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="0.5",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="1.0",method="GET"} 1.0
http_request_duration_seconds_bucket{handler="/v1/models",le="+Inf",method="GET"} 1.0
http_request_duration_seconds_count{handler="/v1/models",method="GET"} 1.0
http_request_duration_seconds_sum{handler="/v1/models",method="GET"} 0.0021718639764003456
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.1",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="0.5",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="1.0",method="POST"} 0.0
http_request_duration_seconds_bucket{handler="/v1/chat/completions",le="+Inf",method="POST"} 392.0
http_request_duration_seconds_count{handler="/v1/chat/completions",method="POST"} 392.0
http_request_duration_seconds_sum{handler="/v1/chat/completions",method="POST"} 2177.8526469714125
# HELP http_request_duration_seconds_created Latency with only few buckets by handler. Made to be only used if aggregation by handler is important. 
# TYPE http_request_duration_seconds_created gauge
http_request_duration_seconds_created{handler="/v1/models",method="GET"} 1.7798136534284024e+09
http_request_duration_seconds_created{handler="/v1/chat/completions",method="POST"} 1.7798136621262796e+09
