xserv/tools/sync-and-build.sh

#!/bin/bash
# Sync local project to dash5 and build/test/bench there.
#
# Usage:
#   ./tools/sync-and-build.sh [test|build|run]
#       Runs `cargo <action> --release` on dash5.
#
#   ./tools/sync-and-build.sh bench [-- <extra runner args>]
#       Ensures llama.cpp is built (tools/setup-llama-cpp.sh) and a BF16 GGUF
#       exists, then runs tools/bench/runner.py against both xserv-server and
#       llama-server. Result lands in $REMOTE_DIR/bench-out/.
#
#   ./tools/sync-and-build.sh fetch-bench-out
#       Copies dash5:$REMOTE_DIR/bench-out/ back to ./bench-out/.

set -e

REMOTE="dash5"
REMOTE_DIR="/opt/wjh/projects/xserv"
REMOTE_MODEL_DIR="${REMOTE_MODEL_DIR:-/opt/wjh/models/qwen3-8b}"
LOCAL_DIR="$(cd "$(dirname "$0")/.." && pwd)"

ACTION="${1:-build}"
shift || true

cuda_env='if [ -d /usr/local/cuda-12.9 ]; then export CUDA_HOME=/usr/local/cuda-12.9; else export CUDA_HOME=/usr/local/cuda; fi && export PATH=$CUDA_HOME/bin:/usr/local/cuda/bin:$PATH'

sync_project() {
    echo "=== Syncing to $REMOTE:$REMOTE_DIR ==="
    # Preserve `target/`, `third_party/` (large + arch-specific) and `bench-out/`
    # on the remote side. Everything else is wiped + replaced.
    ssh "$REMOTE" "mkdir -p $REMOTE_DIR && find $REMOTE_DIR -mindepth 1 -maxdepth 1 ! -name target ! -name third_party ! -name bench-out -exec rm -rf {} +"
    tar --exclude target --exclude third_party --exclude bench-out --exclude .git \
        -C "$LOCAL_DIR" -czf - . \
        | ssh "$REMOTE" "tar -xzf - -C $REMOTE_DIR"
}

sync_llama_src() {
    # dash5 has no network (and no rsync), so we transfer the llama.cpp submodule
    # working tree (source only — never the build dir or .git) via tar-over-ssh.
    local src="$LOCAL_DIR/third_party/llama.cpp"
    if [ ! -f "$src/CMakeLists.txt" ]; then
        echo "ERROR: llama.cpp source not found at $src" >&2
        echo "  Run: git submodule update --init third_party/llama.cpp" >&2
        exit 1
    fi
    echo "=== Syncing llama.cpp source to $REMOTE (tar) ==="
    # Preserve the remote build/ dir; only refresh source files.
    ssh "$REMOTE" "mkdir -p $REMOTE_DIR/third_party/llama.cpp"
    tar --exclude build --exclude .git --exclude '*.gguf' \
        -C "$src" -czf - . \
        | ssh "$REMOTE" "tar -xzf - -C $REMOTE_DIR/third_party/llama.cpp"
}

case "$ACTION" in
    test|build|run|check|clippy)
        sync_project
        echo "=== Running: cargo $ACTION ==="
        ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && cargo $ACTION --release 2>&1"
        ;;

    bench)
        sync_project
        sync_llama_src
        echo "=== Ensuring llama.cpp baseline is built ==="
        ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && \
            ./tools/setup-llama-cpp.sh 2>&1"

        echo "=== Ensuring BF16 GGUF exists for $REMOTE_MODEL_DIR ==="
        # Returned path on stdout's last line is what we feed --llama-gguf.
        GGUF_PATH=$(ssh "$REMOTE" "$cuda_env && cd $REMOTE_DIR && \
            ./tools/convert-to-gguf.sh $REMOTE_MODEL_DIR 2>&1 | tail -1")
        echo "    gguf: $GGUF_PATH"

        echo "=== Building xserv (release) ==="
        ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && \
            cargo build --release 2>&1"

        echo "=== Running benchmark suite ==="
        ssh "$REMOTE" "$cuda_env && cd $REMOTE_DIR && \
            python3 -m tools.bench.runner \
                --xserv-bin ./target/release/xserv-server \
                --xserv-model $REMOTE_MODEL_DIR \
                --llama-bin third_party/llama.cpp/build/bin/llama-server \
                --llama-gguf $GGUF_PATH \
                $* 2>&1"
        ;;

    fetch-bench-out)
        mkdir -p "$LOCAL_DIR/bench-out"
        echo "=== Fetching bench-out from $REMOTE:$REMOTE_DIR/bench-out (tar) ==="
        ssh "$REMOTE" "tar -C $REMOTE_DIR/bench-out -czf - ." \
            | tar -xzf - -C "$LOCAL_DIR/bench-out"
        echo "    -> $LOCAL_DIR/bench-out/"
        ;;

    *)
        echo "Unknown action: $ACTION" >&2
        echo "Usage: $0 {build|test|run|check|clippy|bench|fetch-bench-out} [-- extra args]" >&2
        exit 2
        ;;
esac