Files

52 lines
3.0 KiB
JSON

[
{
"claim": "Batch 0 substrate audit is only partially complete for existing runs.",
"needed_next": "Add request dispatch and finish/error timestamps to future replayer/proxy metrics.",
"reviewer_risk": "Cannot use these runs to prove online per-session sequentiality.",
"status": "partially_supported",
"supporting_data": "metrics.jsonl lacks actual dispatch/finish timestamps in current artifacts."
},
{
"claim": "Batch 1 workload shape can be characterized from formatted traces and metrics.",
"needed_next": "Add cache-hit joined records for actual reuse decomposition.",
"reviewer_risk": "Actual cache reuse decomposition needs cached_tokens joined with hash_ids.",
"status": "supported_for_trace_shape",
"supporting_data": "Full compact trace CPU summary in full_trace_summary.json: input p50/p90/p99 = 20k/87.9k/125.5k, output p50/p90/p99 = 80/811/6.6k, top 1% sessions hold 46.5% of input-token mass."
},
{
"claim": "Static PD separation is worse than combined in existing 200-request GPU A/B.",
"needed_next": "Refresh with PD matrix, multiple seeds, cudagraph-enabled methodology.",
"reviewer_risk": "Legacy run has no per-stage TTFT breakdown and no step-level KV occupancy.",
"status": "supported_by_existing_artifact",
"supporting_data": "outputs/gpu_ab_combined vs outputs/gpu_ab_pdsep metrics.summary.json."
},
{
"claim": "Elastic transfer-based migration does not improve high-contention 500-request run.",
"needed_next": "Attribute whether failure is trigger quality, transfer overhead, or wrong load regime.",
"reviewer_risk": "Existing metrics lack actual sequentiality proof and per-request transfer waterfall.",
"status": "supported_by_existing_artifact",
"supporting_data": "outputs/contention_16s_ts10 vs outputs/contention_16s_elastic metrics.summary.json and gpu_util.csv."
},
{
"claim": "PD-colo prefill/decode interference is not yet directly proven by step-level data in this package.",
"needed_next": "Run Batch 2 controlled same-worker/different-worker injection with step timestamps.",
"reviewer_risk": "Cannot claim interference as causal without Batch 2.",
"status": "not_yet_supported",
"supporting_data": "No decode-step and prefill-overlap timestamp artifact found in summarized runs."
},
{
"claim": "Session hot-spot residual imbalance is suggested but not fully attributed.",
"needed_next": "Collect per-worker queue delay, session-to-worker map, and per-session token mass per worker.",
"reviewer_risk": "GPU util imbalance alone is not enough to prove session hot-spot.",
"status": "partially_supported",
"supporting_data": "gpu_util.csv shows per-GPU mean-util imbalance in existing runs."
},
{
"claim": "SRR is not measured by existing fixed-request runs.",
"needed_next": "Implement Batch 4 Poisson session-arrival SRR sweep.",
"reviewer_risk": "Latency-at-one-load cannot support sustainable throughput claim.",
"status": "not_yet_supported",
"supporting_data": "No arrival-rate sweep artifacts found."
}
]