Skip to content

vllm.engine.multiprocessing

Modules:

Name Description
client
engine

IPC_DATA_EXT module-attribute

IPC_DATA_EXT = '_data_socket'

IPC_HEALTH_EXT module-attribute

IPC_HEALTH_EXT = '_health_socket'

IPC_INPUT_EXT module-attribute

IPC_INPUT_EXT = '_input_socket'

IPC_OUTPUT_EXT module-attribute

IPC_OUTPUT_EXT = '_output_socket'

REQUEST_OUTPUTS_T module-attribute

VLLM_RPC_SUCCESS_STR module-attribute

VLLM_RPC_SUCCESS_STR = 'SUCCESS'

MQEngineDeadError

Bases: RuntimeError

Source code in vllm/engine/multiprocessing/__init__.py
class MQEngineDeadError(RuntimeError):
    pass

RPCAbortRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCAbortRequest:
    request_id: str

request_id instance-attribute

request_id: str

__init__

__init__(request_id: str) -> None

RPCAdapterLoadedResponse dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCAdapterLoadedResponse:
    request_id: str

request_id instance-attribute

request_id: str

__init__

__init__(request_id: str) -> None

RPCError dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCError:
    request_id: Optional[str]
    is_engine_errored: bool
    exception: BaseException

exception instance-attribute

exception: BaseException

is_engine_errored instance-attribute

is_engine_errored: bool

request_id instance-attribute

request_id: Optional[str]

__init__

__init__(
    request_id: Optional[str],
    is_engine_errored: bool,
    exception: BaseException,
) -> None

RPCIsSleepingRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCIsSleepingRequest:
    # Set the default value of request_id to a new UUID
    request_id: str = field(default_factory=lambda: str(uuid.uuid4()))

request_id class-attribute instance-attribute

request_id: str = field(
    default_factory=lambda: str(uuid4())
)

__init__

__init__(request_id: str = lambda: str(uuid4())()) -> None

RPCIsSleepingResponse dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCIsSleepingResponse:
    request_id: str
    is_sleeping: bool

is_sleeping instance-attribute

is_sleeping: bool

request_id instance-attribute

request_id: str

__init__

__init__(request_id: str, is_sleeping: bool) -> None

RPCLoadAdapterRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCLoadAdapterRequest:
    lora_request: LoRARequest
    # Set the default value of request_id to a new UUID
    request_id: str = field(default_factory=lambda: str(uuid.uuid4()))

lora_request instance-attribute

lora_request: LoRARequest

request_id class-attribute instance-attribute

request_id: str = field(
    default_factory=lambda: str(uuid4())
)

__init__

__init__(
    lora_request: LoRARequest,
    request_id: str = lambda: str(uuid4())(),
) -> None

RPCProcessRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCProcessRequest:
    prompt: PromptType
    params: Union[SamplingParams, PoolingParams]
    request_id: str
    lora_request: Optional[LoRARequest] = None
    trace_headers: Optional[Mapping[str, str]] = None
    prompt_adapter_request: Optional[PromptAdapterRequest] = None
    priority: int = 0

    def __init__(
        self,
        prompt: PromptType,
        params: Union[SamplingParams, PoolingParams],
        request_id: str,
        lora_request: Optional[LoRARequest] = None,
        trace_headers: Optional[Mapping[str, str]] = None,
        prompt_adapter_request: Optional[PromptAdapterRequest] = None,
        priority: int = 0,
    ) -> None:
        super().__init__()

        self.prompt = prompt
        self.params = params
        self.request_id = request_id
        self.lora_request = lora_request
        self.trace_headers = trace_headers
        self.prompt_adapter_request = prompt_adapter_request
        self.priority = priority

lora_request class-attribute instance-attribute

lora_request: Optional[LoRARequest] = lora_request

params instance-attribute

params: Union[SamplingParams, PoolingParams] = params

priority class-attribute instance-attribute

priority: int = priority

prompt instance-attribute

prompt: PromptType = prompt

prompt_adapter_request class-attribute instance-attribute

prompt_adapter_request: Optional[PromptAdapterRequest] = (
    prompt_adapter_request
)

request_id instance-attribute

request_id: str = request_id

trace_headers class-attribute instance-attribute

trace_headers: Optional[Mapping[str, str]] = trace_headers

__init__

__init__(
    prompt: PromptType,
    params: Union[SamplingParams, PoolingParams],
    request_id: str,
    lora_request: Optional[LoRARequest] = None,
    trace_headers: Optional[Mapping[str, str]] = None,
    prompt_adapter_request: Optional[
        PromptAdapterRequest
    ] = None,
    priority: int = 0,
) -> None
Source code in vllm/engine/multiprocessing/__init__.py
def __init__(
    self,
    prompt: PromptType,
    params: Union[SamplingParams, PoolingParams],
    request_id: str,
    lora_request: Optional[LoRARequest] = None,
    trace_headers: Optional[Mapping[str, str]] = None,
    prompt_adapter_request: Optional[PromptAdapterRequest] = None,
    priority: int = 0,
) -> None:
    super().__init__()

    self.prompt = prompt
    self.params = params
    self.request_id = request_id
    self.lora_request = lora_request
    self.trace_headers = trace_headers
    self.prompt_adapter_request = prompt_adapter_request
    self.priority = priority

RPCResetMultiModalCacheRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCResetMultiModalCacheRequest(Enum):
    RESET = 1

RESET class-attribute instance-attribute

RESET = 1

RPCResetPrefixCacheRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCResetPrefixCacheRequest:
    device: Device

device instance-attribute

device: Device

__init__

__init__(device: Device) -> None

RPCSleepRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCSleepRequest(Enum):
    SLEEP_LEVEL_1 = 1
    SLEEP_LEVEL_2 = 2

SLEEP_LEVEL_1 class-attribute instance-attribute

SLEEP_LEVEL_1 = 1

SLEEP_LEVEL_2 class-attribute instance-attribute

SLEEP_LEVEL_2 = 2

RPCStartupRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCStartupRequest(Enum):
    IS_SERVER_READY = 1

IS_SERVER_READY class-attribute instance-attribute

IS_SERVER_READY = 1

RPCStartupResponse dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCStartupResponse:
    tracing_enabled: bool

tracing_enabled instance-attribute

tracing_enabled: bool

__init__

__init__(tracing_enabled: bool) -> None

RPCUProfileRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCUProfileRequest(Enum):
    START_PROFILE = 1
    STOP_PROFILE = 2

START_PROFILE class-attribute instance-attribute

START_PROFILE = 1

STOP_PROFILE class-attribute instance-attribute

STOP_PROFILE = 2

RPCWakeUpRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCWakeUpRequest:
    tags: Optional[list[str]] = None

tags class-attribute instance-attribute

tags: Optional[list[str]] = None

__init__

__init__(tags: Optional[list[str]] = None) -> None

ENGINE_DEAD_ERROR

ENGINE_DEAD_ERROR(
    error: Optional[BaseException] = None,
) -> MQEngineDeadError
Source code in vllm/engine/multiprocessing/__init__.py
def ENGINE_DEAD_ERROR(
        error: Optional[BaseException] = None) -> MQEngineDeadError:
    if error is None:
        return MQEngineDeadError(
            "Engine loop is not running. Inspect the stacktrace to "
            "find the original error")

    return MQEngineDeadError(
        "Engine loop is not running. Inspect the stacktrace to "
        f"find the original error: {repr(error)}.")