feat(experiments): per-second GPU util sampler in E4-pressured sweep
Background nvidia-smi poller runs at 1 Hz for all 4 GPUs throughout the sweep, writing CSV to $OUTPUT/gpu_util.csv. Captures: timestamp_iso, gpu_index, util_pct, mem_used_MiB, mem_total_MiB, sm_clock_MHz, power_W, temperature_C Sampler is started before benchmark-live and torn down via trap on EXIT/INT/TERM so it always cleans up even if the run is killed. This data lets us plot time-windowed wall-clock GPU utilization (per-card) so we can answer "is concurrency the bottleneck or is each D's per-session decode the bottleneck" — a question that came up during E4-v3 / v5 analysis.
This commit is contained in:
@@ -54,6 +54,29 @@ log "OUTPUT=$OUTPUT"
|
||||
label=e4p_kvc_v2_d_to_p_sync_run1
|
||||
log "=== [E4p] $label starting ==="
|
||||
|
||||
# Background GPU utilization sampler — every 1 s, all 4 GPUs, CSV output.
|
||||
GPU_CSV="$OUTPUT/gpu_util.csv"
|
||||
log "GPU sampling → $GPU_CSV (1 Hz, gpus 0-3)"
|
||||
echo "timestamp_iso,gpu_index,util_pct,mem_used_MiB,mem_total_MiB,sm_clock_MHz,power_W,temperature_C" > "$GPU_CSV"
|
||||
(
|
||||
while true; do
|
||||
ts_iso=$(date -u +%Y-%m-%dT%H:%M:%S.%3NZ)
|
||||
nvidia-smi --query-gpu=index,utilization.gpu,memory.used,memory.total,clocks.sm,power.draw,temperature.gpu \
|
||||
--format=csv,noheader,nounits 2>/dev/null \
|
||||
| sed -e "s/^/${ts_iso},/" -e 's/ //g' >> "$GPU_CSV" || true
|
||||
sleep 1
|
||||
done
|
||||
) &
|
||||
GPU_SAMPLER_PID=$!
|
||||
log "GPU sampler pid=$GPU_SAMPLER_PID"
|
||||
|
||||
cleanup_gpu_sampler() {
|
||||
kill -9 "$GPU_SAMPLER_PID" 2>/dev/null || true
|
||||
wait "$GPU_SAMPLER_PID" 2>/dev/null || true
|
||||
log "GPU sampler stopped (output: $GPU_CSV, $(wc -l < "$GPU_CSV") rows)"
|
||||
}
|
||||
trap cleanup_gpu_sampler EXIT INT TERM
|
||||
|
||||
uv run --no-sync python -m agentic_pd_hybrid.cli benchmark-live \
|
||||
--trace "$TRACE" \
|
||||
--output-root "$OUTPUT" \
|
||||
|
||||
Reference in New Issue
Block a user