[2026-04-28 20:50:21] Starting TP1 v4 sweep (KVC kv-aware, session soft_cap raised 4->16)
[2026-04-28 20:50:21] Model: /mnt/kzlin/workflow/pd-hybrid/simm-swe-bench/models/Qwen3-30B-A3B-Instruct-2507
[2026-04-28 20:50:21] Trace: outputs/qwen35-swebench-50sess.jsonl (4449 requests, 52 sessions)
[2026-04-28 20:50:21] Key change: _decode_session_soft_cap now min(16, ...) instead of min(4, ...)
[2026-04-28 20:50:21] 
[2026-04-28 20:50:21] === [EXP1] 1P7D KVC kv-aware cap=16 ===
[2026-04-28 21:40:57] === exp1_1p7d_kvc_cap16 COMPLETED ===
[2026-04-28 21:40:57] Summary:
{
  "actual_output_tokens_stats": {
    "count": 4014.0,
    "mean": 215.048081714001,
    "p50": 83.0,
    "p90": 570.0,
    "p99": 1343.0
  },
  "cache_hit_request_count": 3865,
  "cached_tokens_stats": {
    "count": 4449.0,
    "mean": 21373.60867610699,
    "p50": 18429.0,
    "p90": 45643.0,
    "p99": 65088.0
  },
  "decode_request_priorities": {},
  "error_count": 435,
  "execution_modes": {
    "kvcache-centric": 435,
    "kvcache-direct-to-d-session": 2180,
    "pd-router-d-session-reseed": 44,
    "pd-router-d-session-reseed-after-eviction": 1,
    "pd-router-fallback-d-backpressure": 36,
    "pd-router-fallback-large-append": 35,
    "pd-router-fallback-large-append-seed-filter-early-turn": 52,
    "pd-router-fallback-large-append-session-cap": 1500,
    "pd-router-fallback-no-d-capacity": 13,
    "pd-router-fallback-session-cap": 43,
    "pd-router-large-append-reseed": 55,
    "pd-router-large-append-reseed-after-eviction": 3,
    "pd-router-turn1-d-backpressure": 1,
    "pd-router-turn1-no-d-capacity": 5,
    "pd-router-turn1-seed": 46
  },
  "latency_stats_s": {
    "count": 4014.0,
    "mean": 4.214657033050009,
    "p50": 1.0827504023909569,
    "p90": 13.380241627804935,
    "p99": 24.453291333280504
  },
  "mechanisms": {
    "kvcache-centric": 4449
  },
  "per_decode_load": {
    "decode-0": 690,
    "decode-1": 599,
    "decode-2": 660,
    "decode-3": 584,
    "decode-4": 606,
    "decode-5": 646,
    "decode-6": 664
  },
  "per_prefill_load": {
    "prefill-0": 4449
  },
  "prefill_request_priorities": {
    "-100": 149,
    "100": 1685
  },
  "re_prefill_count": 0,
  "request_count": 4449,
  "reuse_expected_count": 4397,
  "reuse_observed_count": 4397,
  "router_url": "http://127.0.0.1:8000",
  "session_reset_count": 0,
  "session_reused_count": 2180,
  "total_actual_kv_transfer_blocks": 52857,
  "total_cached_tokens": 95091185,
  "total_kv_transfer_blocks": 105235,
  "tpot_stats_s": {
    "count": 4014.0,
    "mean": 0.005804301410418847,
    "p50": 0.005607025208882987,
    "p90": 0.007293824862528552,
    "p99": 0.008864479259402893
  },
  "trace_path": "outputs/qwen3-30b-tp1-v4-cap16/kvcache-centric-kv-aware-worker-admission-20260428T125022Z/sampled-trace.jsonl",
  "truncated_request_count": 43,
  "ttft_stats_s": {
    "count": 4014.0,
    "mean": 2.915135478307124,
    "p50": 0.05643345229327679,
    "p90": 11.900803190656006,
    "p99": 22.758968392387033
  }
}
[2026-04-28 21:40:57] Saved to outputs/qwen3-30b-tp1-v4-cap16/exp1_1p7d_kvc_cap16_summary.json + exp1_1p7d_kvc_cap16_metrics.jsonl
[2026-04-28 21:40:57] 
[2026-04-28 21:40:57] === [EXP2] 2P6D KVC kv-aware cap=16 ===
[2026-04-28 22:27:53] === exp2_2p6d_kvc_cap16 COMPLETED ===
[2026-04-28 22:27:53] Summary:
{
  "actual_output_tokens_stats": {
    "count": 4046.0,
    "mean": 224.65002471576867,
    "p50": 84.0,
    "p90": 576.0,
    "p99": 1349.0
  },
  "cache_hit_request_count": 3925,
  "cached_tokens_stats": {
    "count": 4449.0,
    "mean": 22852.7439874129,
    "p50": 19584.0,
    "p90": 49009.0,
    "p99": 67320.0
  },
  "decode_request_priorities": {},
  "error_count": 403,
  "execution_modes": {
    "kvcache-centric": 403,
    "kvcache-direct-to-d-session": 2348,
    "pd-router-d-session-reseed": 28,
    "pd-router-fallback-d-backpressure": 7,
    "pd-router-fallback-large-append": 68,
    "pd-router-fallback-large-append-seed-filter-early-turn": 45,
    "pd-router-fallback-large-append-session-cap": 1403,
    "pd-router-fallback-no-d-capacity": 9,
    "pd-router-fallback-session-cap": 25,
    "pd-router-large-append-reseed": 57,
    "pd-router-large-append-reseed-after-eviction": 6,
    "pd-router-turn1-no-d-capacity": 1,
    "pd-router-turn1-seed": 49
  },
  "latency_stats_s": {
    "count": 4046.0,
    "mean": 2.505981629502371,
    "p50": 0.8372491216287017,
    "p90": 6.5139341270551085,
    "p99": 18.335972285829484
  },
  "mechanisms": {
    "kvcache-centric": 4449
  },
  "per_decode_load": {
    "decode-0": 767,
    "decode-1": 680,
    "decode-2": 906,
    "decode-3": 818,
    "decode-4": 800,
    "decode-5": 478
  },
  "per_prefill_load": {
    "prefill-0": 2225,
    "prefill-1": 2224
  },
  "prefill_request_priorities": {
    "-100": 140,
    "100": 1558
  },
  "re_prefill_count": 0,
  "request_count": 4449,
  "reuse_expected_count": 4397,
  "reuse_observed_count": 4397,
  "router_url": "http://127.0.0.1:8000",
  "session_reset_count": 0,
  "session_reused_count": 2348,
  "total_actual_kv_transfer_blocks": 50727,
  "total_cached_tokens": 101671858,
  "total_kv_transfer_blocks": 105235,
  "tpot_stats_s": {
    "count": 4046.0,
    "mean": 0.005708743129332261,
    "p50": 0.005565466725497757,
    "p90": 0.006912594398356141,
    "p99": 0.008102089307750717
  },
  "trace_path": "outputs/qwen3-30b-tp1-v4-cap16/kvcache-centric-kv-aware-worker-admission-20260428T134057Z/sampled-trace.jsonl",
  "truncated_request_count": 36,
  "ttft_stats_s": {
    "count": 4046.0,
    "mean": 1.1653790952959129,
    "p50": 0.05140436999499798,
    "p90": 2.6447059931233525,
    "p99": 15.121314341202378
  }
}
[2026-04-28 22:27:53] Saved to outputs/qwen3-30b-tp1-v4-cap16/exp2_2p6d_kvc_cap16_summary.json + exp2_2p6d_kvc_cap16_metrics.jsonl
[2026-04-28 22:27:53] 
[2026-04-28 22:27:53] === ALL TP1 V4 SWEEP EXPERIMENTS DONE ===
