From 5c09a3a0cb1e231008b96056abcec5541a9d0b0c Mon Sep 17 00:00:00 2001 From: Claude Code Agent Date: Wed, 13 May 2026 14:25:16 +0800 Subject: [PATCH] feat(experiments): per-second GPU util sampler in E4-pressured sweep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Background nvidia-smi poller runs at 1 Hz for all 4 GPUs throughout the sweep, writing CSV to $OUTPUT/gpu_util.csv. Captures: timestamp_iso, gpu_index, util_pct, mem_used_MiB, mem_total_MiB, sm_clock_MHz, power_W, temperature_C Sampler is started before benchmark-live and torn down via trap on EXIT/INT/TERM so it always cleans up even if the run is killed. This data lets us plot time-windowed wall-clock GPU utilization (per-card) so we can answer "is concurrency the bottleneck or is each D's per-session decode the bottleneck" — a question that came up during E4-v3 / v5 analysis. --- scripts/sweep_e4_pressured.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/scripts/sweep_e4_pressured.sh b/scripts/sweep_e4_pressured.sh index 2776828..605817c 100755 --- a/scripts/sweep_e4_pressured.sh +++ b/scripts/sweep_e4_pressured.sh @@ -54,6 +54,29 @@ log "OUTPUT=$OUTPUT" label=e4p_kvc_v2_d_to_p_sync_run1 log "=== [E4p] $label starting ===" +# Background GPU utilization sampler — every 1 s, all 4 GPUs, CSV output. +GPU_CSV="$OUTPUT/gpu_util.csv" +log "GPU sampling → $GPU_CSV (1 Hz, gpus 0-3)" +echo "timestamp_iso,gpu_index,util_pct,mem_used_MiB,mem_total_MiB,sm_clock_MHz,power_W,temperature_C" > "$GPU_CSV" +( + while true; do + ts_iso=$(date -u +%Y-%m-%dT%H:%M:%S.%3NZ) + nvidia-smi --query-gpu=index,utilization.gpu,memory.used,memory.total,clocks.sm,power.draw,temperature.gpu \ + --format=csv,noheader,nounits 2>/dev/null \ + | sed -e "s/^/${ts_iso},/" -e 's/ //g' >> "$GPU_CSV" || true + sleep 1 + done +) & +GPU_SAMPLER_PID=$! +log "GPU sampler pid=$GPU_SAMPLER_PID" + +cleanup_gpu_sampler() { + kill -9 "$GPU_SAMPLER_PID" 2>/dev/null || true + wait "$GPU_SAMPLER_PID" 2>/dev/null || true + log "GPU sampler stopped (output: $GPU_CSV, $(wc -l < "$GPU_CSV") rows)" +} +trap cleanup_gpu_sampler EXIT INT TERM + uv run --no-sync python -m agentic_pd_hybrid.cli benchmark-live \ --trace "$TRACE" \ --output-root "$OUTPUT" \