class RequestLogger:
def __init__(self, *, max_log_len: int | None) -> None:
self.max_log_len = max_log_len
def log_inputs(
self,
request_id: str,
prompt: str | None,
prompt_token_ids: list[int] | None,
prompt_embeds: torch.Tensor | None,
params: SamplingParams | PoolingParams | BeamSearchParams | None,
lora_request: LoRARequest | None,
) -> None:
max_log_len = self.max_log_len
if max_log_len is not None:
if prompt is not None:
prompt = prompt[:max_log_len]
if prompt_token_ids is not None:
prompt_token_ids = prompt_token_ids[:max_log_len]
logger.debug(
"Request %s details: prompt: %r, "
"prompt_token_ids: %s, "
"prompt_embeds shape: %s.",
request_id,
prompt,
prompt_token_ids,
prompt_embeds.shape if prompt_embeds is not None else None,
)
logger.info(
"Received request %s: params: %s, lora_request: %s.",
request_id,
params,
lora_request,
)
def log_outputs(
self,
request_id: str,
outputs: str,
output_token_ids: Sequence[int] | None,
finish_reason: str | None = None,
is_streaming: bool = False,
delta: bool = False,
) -> None:
max_log_len = self.max_log_len
if max_log_len is not None:
if outputs is not None:
outputs = outputs[:max_log_len]
if output_token_ids is not None:
# Convert to list and apply truncation
output_token_ids = list(output_token_ids)[:max_log_len]
stream_info = ""
if is_streaming:
stream_info = " (streaming delta)" if delta else " (streaming complete)"
logger.info(
"Generated response %s%s: output: %r, "
"output_token_ids: %s, finish_reason: %s",
request_id,
stream_info,
outputs,
output_token_ids,
finish_reason,
)