180 lines
6.1 KiB
Bash
Executable File
180 lines
6.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
if [ "$#" -ne 2 ]; then
|
|
echo "usage: $0 <run-name> <fixture-dir>" >&2
|
|
echo "example: $0 n195_default runs/rs1/blocker_request_194/fixtures/coder_195" >&2
|
|
exit 2
|
|
fi
|
|
|
|
RUN_NAME="$1"
|
|
FIXTURE_DIR="$2"
|
|
REPLAYSERVE_ROOT="${REPLAYSERVE_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
|
|
FRONTIER_ROOT="${FRONTIER_ROOT:-/tmp/toc-llm-sim-research/Frontier}"
|
|
if [ -z "${PYTHON_BIN:-}" ] && [ -x "$REPLAYSERVE_ROOT/.venv/bin/python" ]; then
|
|
PYTHON_BIN="$REPLAYSERVE_ROOT/.venv/bin/python"
|
|
else
|
|
PYTHON_BIN="${PYTHON_BIN:-python3}"
|
|
fi
|
|
PYTHON_DEPS_DIR="${PYTHON_DEPS_DIR:-$REPLAYSERVE_ROOT/.deps/python}"
|
|
RUN_ROOT="${RUN_ROOT:-$REPLAYSERVE_ROOT/runs/rs1/blocker_request_194/probes}"
|
|
RUN_DIR="${RUN_ROOT}/${RUN_NAME}"
|
|
case "$FIXTURE_DIR" in
|
|
/*) ;;
|
|
*) FIXTURE_DIR="${REPLAYSERVE_ROOT}/${FIXTURE_DIR}" ;;
|
|
esac
|
|
TRACE_FILE="${FIXTURE_DIR}/frontier.csv"
|
|
SIDECAR_FILE="${FIXTURE_DIR}/sidecar.jsonl"
|
|
METRICS_ROOT="${RUN_DIR}/frontier_metrics"
|
|
RUN_ID="rs1_blocker_${RUN_NAME}"
|
|
|
|
PREFIX_CACHING="${PREFIX_CACHING:-true}"
|
|
CHUNKED_PREFILL="${CHUNKED_PREFILL:-true}"
|
|
LONG_PREFILL_TOKEN_THRESHOLD="${LONG_PREFILL_TOKEN_THRESHOLD:-64}"
|
|
BATCH_SIZE_CAP="${BATCH_SIZE_CAP:-128}"
|
|
MAX_TOKENS_IN_BATCH="${MAX_TOKENS_IN_BATCH:-32768}"
|
|
|
|
require_bool() {
|
|
local name="$1"
|
|
local value="$2"
|
|
if [ "$value" != "true" ] && [ "$value" != "false" ]; then
|
|
echo "ERROR: $name must be true or false; got $value" >&2
|
|
exit 2
|
|
fi
|
|
}
|
|
|
|
require_bool "PREFIX_CACHING" "$PREFIX_CACHING"
|
|
require_bool "CHUNKED_PREFILL" "$CHUNKED_PREFILL"
|
|
|
|
if [ ! -d "$FRONTIER_ROOT" ]; then
|
|
echo "ERROR: Frontier root does not exist: $FRONTIER_ROOT" >&2
|
|
exit 2
|
|
fi
|
|
if [ ! -f "$TRACE_FILE" ]; then
|
|
echo "ERROR: fixture trace does not exist: $TRACE_FILE" >&2
|
|
exit 2
|
|
fi
|
|
if [ ! -f "$SIDECAR_FILE" ]; then
|
|
echo "ERROR: fixture sidecar does not exist: $SIDECAR_FILE" >&2
|
|
exit 2
|
|
fi
|
|
|
|
mkdir -p "$RUN_DIR" "$METRICS_ROOT"
|
|
|
|
if [ -d "$PYTHON_DEPS_DIR" ]; then
|
|
export PYTHONPATH="$PYTHON_DEPS_DIR:$FRONTIER_ROOT${PYTHONPATH:+:$PYTHONPATH}"
|
|
else
|
|
export PYTHONPATH="$FRONTIER_ROOT${PYTHONPATH:+:$PYTHONPATH}"
|
|
fi
|
|
export WANDB_DISABLED=true
|
|
export VIDUR_DISABLE_WANDB=1
|
|
export FRONTIER_LOG_LEVEL="${FRONTIER_LOG_LEVEL:-info}"
|
|
export PYTHONDONTWRITEBYTECODE=1
|
|
|
|
CMD=(
|
|
"$PYTHON_BIN" -m frontier.main
|
|
--simulation_mode online
|
|
--sys_arch co-location
|
|
--cc_backend_config_type analytical
|
|
--cluster_config_num_replicas 1
|
|
--cluster_scheduler_config_type sticky_round_robin
|
|
--replica_config_model_name Qwen/Qwen3-32B
|
|
--replica_config_device a800
|
|
--replica_config_network_device a800_dgx
|
|
--replica_config_attn_tensor_parallel_size 2
|
|
--replica_config_num_pipeline_stages 1
|
|
--replica_config_attn_data_parallel_size 1
|
|
--replica_scheduler_config_type vllm_v1
|
|
--decode_cuda_graph_mode full_decode_only
|
|
--vllm_v1_scheduler_config_batch_size_cap "$BATCH_SIZE_CAP"
|
|
--vllm_v1_scheduler_config_max_tokens_in_batch "$MAX_TOKENS_IN_BATCH"
|
|
--vllm_v1_scheduler_config_long_prefill_token_threshold "$LONG_PREFILL_TOKEN_THRESHOLD"
|
|
--vllm_v1_scheduler_config_block_size 16
|
|
--vllm_v1_scheduler_config_num_blocks_mode memory_planner
|
|
--vllm_v1_scheduler_config_gpu_memory_utilization 0.9
|
|
--vllm_v1_scheduler_config_non_kv_cache_overhead_bytes 0
|
|
--request_generator_config_type trace_replay
|
|
--trace_request_generator_config_trace_file "$TRACE_FILE"
|
|
--trace_request_generator_config_max_tokens 32768
|
|
--random_forrest_execution_time_predictor_config_enable_dummy_mode
|
|
--random_forrest_execution_time_predictor_config_dummy_execution_time_ms 1.0
|
|
--metrics_config_output_dir "$METRICS_ROOT"
|
|
--metrics_config_run_id "$RUN_ID"
|
|
--metrics_config_write_metrics
|
|
--metrics_config_store_request_metrics
|
|
--metrics_config_store_batch_metrics
|
|
--metrics_config_store_token_completion_metrics
|
|
--metrics_config_store_utilization_metrics
|
|
--no-metrics_config_store_plots
|
|
--no-metrics_config_enable_chrome_trace
|
|
--no-metrics_config_write_json_trace
|
|
--no-metrics_config_store_frontier_stage_batch_ledger
|
|
)
|
|
|
|
if [ "$PREFIX_CACHING" = "true" ]; then
|
|
CMD+=(--vllm_v1_scheduler_config_enable_prefix_caching)
|
|
else
|
|
CMD+=(--no-vllm_v1_scheduler_config_enable_prefix_caching)
|
|
fi
|
|
|
|
if [ "$CHUNKED_PREFILL" = "true" ]; then
|
|
CMD+=(--vllm_v1_scheduler_config_enable_chunked_prefill)
|
|
else
|
|
CMD+=(--no-vllm_v1_scheduler_config_enable_chunked_prefill)
|
|
fi
|
|
|
|
{
|
|
printf 'cd %q\n' "$FRONTIER_ROOT"
|
|
printf 'export PYTHONPATH=%q\n' "$PYTHONPATH"
|
|
printf 'export WANDB_DISABLED=%q\n' "$WANDB_DISABLED"
|
|
printf 'export VIDUR_DISABLE_WANDB=%q\n' "$VIDUR_DISABLE_WANDB"
|
|
printf 'export FRONTIER_LOG_LEVEL=%q\n' "$FRONTIER_LOG_LEVEL"
|
|
printf 'export PYTHONDONTWRITEBYTECODE=%q\n' "$PYTHONDONTWRITEBYTECODE"
|
|
printf 'command='
|
|
printf '%q ' "${CMD[@]}"
|
|
printf '\n'
|
|
} > "$RUN_DIR/command.txt"
|
|
|
|
{
|
|
printf 'run_name=%s\n' "$RUN_NAME"
|
|
printf 'replayserve_root=%s\n' "$REPLAYSERVE_ROOT"
|
|
printf 'frontier_root=%s\n' "$FRONTIER_ROOT"
|
|
printf 'python_deps_dir=%s\n' "$PYTHON_DEPS_DIR"
|
|
printf 'fixture_dir=%s\n' "$FIXTURE_DIR"
|
|
printf 'trace_file=%s\n' "$TRACE_FILE"
|
|
printf 'sidecar_file=%s\n' "$SIDECAR_FILE"
|
|
printf 'run_dir=%s\n' "$RUN_DIR"
|
|
printf 'metrics_root=%s\n' "$METRICS_ROOT"
|
|
printf 'run_id=%s\n' "$RUN_ID"
|
|
printf 'prefix_caching=%s\n' "$PREFIX_CACHING"
|
|
printf 'chunked_prefill=%s\n' "$CHUNKED_PREFILL"
|
|
printf 'long_prefill_token_threshold=%s\n' "$LONG_PREFILL_TOKEN_THRESHOLD"
|
|
printf 'batch_size_cap=%s\n' "$BATCH_SIZE_CAP"
|
|
printf 'max_tokens_in_batch=%s\n' "$MAX_TOKENS_IN_BATCH"
|
|
printf 'frontier_head=%s\n' "$(git -C "$FRONTIER_ROOT" rev-parse HEAD)"
|
|
} > "$RUN_DIR/env.txt"
|
|
|
|
START_EPOCH="$(date +%s)"
|
|
printf '%s\n' "$START_EPOCH" > "$RUN_DIR/start_epoch.txt"
|
|
|
|
set +e
|
|
(
|
|
cd "$FRONTIER_ROOT"
|
|
"${CMD[@]}"
|
|
) >"$RUN_DIR/stdout.log" 2>"$RUN_DIR/stderr.log"
|
|
EXIT_CODE=$?
|
|
set -e
|
|
|
|
END_EPOCH="$(date +%s)"
|
|
printf '%s\n' "$END_EPOCH" > "$RUN_DIR/end_epoch.txt"
|
|
printf '%s\n' "$EXIT_CODE" > "$RUN_DIR/exit_code.txt"
|
|
printf '%s\n' "$((END_EPOCH - START_EPOCH))" > "$RUN_DIR/runtime_seconds.txt"
|
|
|
|
if [ "$EXIT_CODE" -eq 0 ]; then
|
|
"$PYTHON_BIN" "$REPLAYSERVE_ROOT/tools/postprocess_frontier_smoke.py" \
|
|
--run-dir "$RUN_DIR" \
|
|
--fixture-dir "$FIXTURE_DIR" || true
|
|
fi
|
|
|
|
exit "$EXIT_CODE"
|