Skip to content

vllm.tracing.utils

TRACE_HEADERS module-attribute

TRACE_HEADERS = ['traceparent', 'tracestate']

logger module-attribute

logger = init_logger(__name__)

LoadingSpanAttributes

Custom attributes for code-level tracing (file, line number).

Source code in vllm/tracing/utils.py
class LoadingSpanAttributes:
    """Custom attributes for code-level tracing (file, line number)."""

    CODE_NAMESPACE = "code.namespace"
    CODE_FUNCTION = "code.function"
    CODE_FILEPATH = "code.filepath"
    CODE_LINENO = "code.lineno"

CODE_FILEPATH class-attribute instance-attribute

CODE_FILEPATH = 'code.filepath'

CODE_FUNCTION class-attribute instance-attribute

CODE_FUNCTION = 'code.function'

CODE_LINENO class-attribute instance-attribute

CODE_LINENO = 'code.lineno'

CODE_NAMESPACE class-attribute instance-attribute

CODE_NAMESPACE = 'code.namespace'

SpanAttributes

Standard attributes for spans.

These are largely based on OpenTelemetry Semantic Conventions but are defined here as constants so they can be used by any backend or logger.

Source code in vllm/tracing/utils.py
class SpanAttributes:
    """
    Standard attributes for spans.

    These are largely based on OpenTelemetry Semantic Conventions but are defined
    here as constants so they can be used by any backend or logger.
    """

    # Attribute names copied from OTel semantic conventions to avoid version conflicts
    GEN_AI_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
    GEN_AI_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
    GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
    GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p"
    GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
    GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"

    # Custom attributes added until they are standardized
    GEN_AI_REQUEST_ID = "gen_ai.request.id"
    GEN_AI_REQUEST_N = "gen_ai.request.n"
    GEN_AI_USAGE_NUM_SEQUENCES = "gen_ai.usage.num_sequences"
    GEN_AI_LATENCY_TIME_IN_QUEUE = "gen_ai.latency.time_in_queue"
    GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN = "gen_ai.latency.time_to_first_token"
    GEN_AI_LATENCY_E2E = "gen_ai.latency.e2e"
    GEN_AI_LATENCY_TIME_IN_SCHEDULER = "gen_ai.latency.time_in_scheduler"

    # Latency breakdowns
    GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD = "gen_ai.latency.time_in_model_forward"
    GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE = "gen_ai.latency.time_in_model_execute"
    GEN_AI_LATENCY_TIME_IN_MODEL_PREFILL = "gen_ai.latency.time_in_model_prefill"
    GEN_AI_LATENCY_TIME_IN_MODEL_DECODE = "gen_ai.latency.time_in_model_decode"
    GEN_AI_LATENCY_TIME_IN_MODEL_INFERENCE = "gen_ai.latency.time_in_model_inference"

GEN_AI_LATENCY_E2E class-attribute instance-attribute

GEN_AI_LATENCY_E2E = 'gen_ai.latency.e2e'

GEN_AI_LATENCY_TIME_IN_MODEL_DECODE class-attribute instance-attribute

GEN_AI_LATENCY_TIME_IN_MODEL_DECODE = (
    "gen_ai.latency.time_in_model_decode"
)

GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE class-attribute instance-attribute

GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE = (
    "gen_ai.latency.time_in_model_execute"
)

GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD class-attribute instance-attribute

GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD = (
    "gen_ai.latency.time_in_model_forward"
)

GEN_AI_LATENCY_TIME_IN_MODEL_INFERENCE class-attribute instance-attribute

GEN_AI_LATENCY_TIME_IN_MODEL_INFERENCE = (
    "gen_ai.latency.time_in_model_inference"
)

GEN_AI_LATENCY_TIME_IN_MODEL_PREFILL class-attribute instance-attribute

GEN_AI_LATENCY_TIME_IN_MODEL_PREFILL = (
    "gen_ai.latency.time_in_model_prefill"
)

GEN_AI_LATENCY_TIME_IN_QUEUE class-attribute instance-attribute

GEN_AI_LATENCY_TIME_IN_QUEUE = (
    "gen_ai.latency.time_in_queue"
)

GEN_AI_LATENCY_TIME_IN_SCHEDULER class-attribute instance-attribute

GEN_AI_LATENCY_TIME_IN_SCHEDULER = (
    "gen_ai.latency.time_in_scheduler"
)

GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN class-attribute instance-attribute

GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN = (
    "gen_ai.latency.time_to_first_token"
)

GEN_AI_REQUEST_ID class-attribute instance-attribute

GEN_AI_REQUEST_ID = 'gen_ai.request.id'

GEN_AI_REQUEST_MAX_TOKENS class-attribute instance-attribute

GEN_AI_REQUEST_MAX_TOKENS = 'gen_ai.request.max_tokens'

GEN_AI_REQUEST_N class-attribute instance-attribute

GEN_AI_REQUEST_N = 'gen_ai.request.n'

GEN_AI_REQUEST_TEMPERATURE class-attribute instance-attribute

GEN_AI_REQUEST_TEMPERATURE = 'gen_ai.request.temperature'

GEN_AI_REQUEST_TOP_P class-attribute instance-attribute

GEN_AI_REQUEST_TOP_P = 'gen_ai.request.top_p'

GEN_AI_RESPONSE_MODEL class-attribute instance-attribute

GEN_AI_RESPONSE_MODEL = 'gen_ai.response.model'

GEN_AI_USAGE_COMPLETION_TOKENS class-attribute instance-attribute

GEN_AI_USAGE_COMPLETION_TOKENS = (
    "gen_ai.usage.completion_tokens"
)

GEN_AI_USAGE_NUM_SEQUENCES class-attribute instance-attribute

GEN_AI_USAGE_NUM_SEQUENCES = 'gen_ai.usage.num_sequences'

GEN_AI_USAGE_PROMPT_TOKENS class-attribute instance-attribute

GEN_AI_USAGE_PROMPT_TOKENS = 'gen_ai.usage.prompt_tokens'

contains_trace_headers

contains_trace_headers(headers: Mapping[str, str]) -> bool

Check if the provided headers dictionary contains trace context.

Source code in vllm/tracing/utils.py
def contains_trace_headers(headers: Mapping[str, str]) -> bool:
    """Check if the provided headers dictionary contains trace context."""
    return any(h in headers for h in TRACE_HEADERS)

extract_trace_headers

extract_trace_headers(
    headers: Mapping[str, str],
) -> Mapping[str, str]

Extract only trace-related headers from a larger header dictionary. Useful for logging or passing context to a non-OTel client.

Source code in vllm/tracing/utils.py
def extract_trace_headers(headers: Mapping[str, str]) -> Mapping[str, str]:
    """
    Extract only trace-related headers from a larger header dictionary.
    Useful for logging or passing context to a non-OTel client.
    """
    return {h: headers[h] for h in TRACE_HEADERS if h in headers}

log_tracing_disabled_warning

log_tracing_disabled_warning() -> None
Source code in vllm/tracing/utils.py
@run_once
def log_tracing_disabled_warning() -> None:
    logger.warning("Received a request with trace context but tracing is disabled")