Skip to content

vllm.core.placeholder_block_space_manager

PlaceholderBlockSpaceManager

Bases: BlockSpaceManager

A version of BlockSpaceManager for use in environments where block management is not required. For example: pooling models or attention-free models like Mamba.

This class provides the same interface as BlockSpaceManager, but its methods perform no actions or return simple values like True in specific actions. It's designed to be used in scenarios where the overhead of block management is unnecessary, such as in an embedding environment.

Source code in vllm/core/placeholder_block_space_manager.py
class PlaceholderBlockSpaceManager(BlockSpaceManager):
    """A version of BlockSpaceManager for use in environments
    where block management is not required. 
    For example: pooling models or attention-free models like Mamba.

    This class provides the same interface as BlockSpaceManager, but its
    methods perform no actions or return simple values like True in specific
    actions. It's designed to be used in scenarios where the overhead of
    block management is unnecessary, such as in an embedding environment.
    """

    def __init__(
        self,
        **kwargs,
    ) -> None:
        pass

    def can_allocate(self,
                     seq_group: SequenceGroup,
                     num_lookahead_slots: int = 0) -> AllocStatus:
        # Always return OK for dummy purposes
        return AllocStatus.OK

    def allocate(self, seq_group: SequenceGroup) -> None:
        # No actual allocation logic needed
        pass

    def can_append_slots(self, seq_group: SequenceGroup,
                         num_lookahead_slots: int) -> bool:
        return True

    def append_slots(
        self,
        seq: Sequence,
        num_lookahead_slots: int,
    ) -> List[Tuple[int, int]]:
        return []

    def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
        pass

    def can_swap_in(self, seq_group: SequenceGroup,
                    num_lookahead_slots: int) -> AllocStatus:
        return AllocStatus.OK

    def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
        return None  # type: ignore

    def can_swap_out(self, seq_group: SequenceGroup) -> bool:
        return True

    def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
        return None  # type: ignore

    def free(self, seq: Sequence) -> None:
        # No operation on free
        return

    def get_block_table(self, seq: Sequence) -> List[int]:
        return None  # type: ignore

    def get_num_free_gpu_blocks(self) -> int:
        return 1

    def get_num_free_cpu_blocks(self) -> int:
        return 1

    def access_all_blocks_in_seq(
        self,
        seq: Sequence,
        access_time: float,
    ) -> None:
        pass

    def get_common_computed_block_ids(self,
                                      seq_group: List[Sequence]) -> List[int]:
        return []

    def mark_blocks_as_computed(self, seq_group: SequenceGroup,
                                token_chunk_size: int):
        pass

    def get_prefix_cache_hit_rate(self, device: Device) -> float:
        return -1

    def reset_prefix_cache(self, device: Optional[Device] = None) -> bool:
        return True

    def get_num_cached_tokens(self, seq: Sequence) -> int:
        return 0

    def remove_seq_from_computed_blocks_tracker(self, seq: Sequence) -> None:
        return

__init__

__init__(**kwargs) -> None
Source code in vllm/core/placeholder_block_space_manager.py
def __init__(
    self,
    **kwargs,
) -> None:
    pass

access_all_blocks_in_seq

access_all_blocks_in_seq(
    seq: Sequence, access_time: float
) -> None
Source code in vllm/core/placeholder_block_space_manager.py
def access_all_blocks_in_seq(
    self,
    seq: Sequence,
    access_time: float,
) -> None:
    pass

allocate

allocate(seq_group: SequenceGroup) -> None
Source code in vllm/core/placeholder_block_space_manager.py
def allocate(self, seq_group: SequenceGroup) -> None:
    # No actual allocation logic needed
    pass

append_slots

append_slots(
    seq: Sequence, num_lookahead_slots: int
) -> List[Tuple[int, int]]
Source code in vllm/core/placeholder_block_space_manager.py
def append_slots(
    self,
    seq: Sequence,
    num_lookahead_slots: int,
) -> List[Tuple[int, int]]:
    return []

can_allocate

can_allocate(
    seq_group: SequenceGroup, num_lookahead_slots: int = 0
) -> AllocStatus
Source code in vllm/core/placeholder_block_space_manager.py
def can_allocate(self,
                 seq_group: SequenceGroup,
                 num_lookahead_slots: int = 0) -> AllocStatus:
    # Always return OK for dummy purposes
    return AllocStatus.OK

can_append_slots

can_append_slots(
    seq_group: SequenceGroup, num_lookahead_slots: int
) -> bool
Source code in vllm/core/placeholder_block_space_manager.py
def can_append_slots(self, seq_group: SequenceGroup,
                     num_lookahead_slots: int) -> bool:
    return True

can_swap_in

can_swap_in(
    seq_group: SequenceGroup, num_lookahead_slots: int
) -> AllocStatus
Source code in vllm/core/placeholder_block_space_manager.py
def can_swap_in(self, seq_group: SequenceGroup,
                num_lookahead_slots: int) -> AllocStatus:
    return AllocStatus.OK

can_swap_out

can_swap_out(seq_group: SequenceGroup) -> bool
Source code in vllm/core/placeholder_block_space_manager.py
def can_swap_out(self, seq_group: SequenceGroup) -> bool:
    return True

fork

fork(parent_seq: Sequence, child_seq: Sequence) -> None
Source code in vllm/core/placeholder_block_space_manager.py
def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
    pass

free

free(seq: Sequence) -> None
Source code in vllm/core/placeholder_block_space_manager.py
def free(self, seq: Sequence) -> None:
    # No operation on free
    return

get_block_table

get_block_table(seq: Sequence) -> List[int]
Source code in vllm/core/placeholder_block_space_manager.py
def get_block_table(self, seq: Sequence) -> List[int]:
    return None  # type: ignore

get_common_computed_block_ids

get_common_computed_block_ids(
    seq_group: List[Sequence],
) -> List[int]
Source code in vllm/core/placeholder_block_space_manager.py
def get_common_computed_block_ids(self,
                                  seq_group: List[Sequence]) -> List[int]:
    return []

get_num_cached_tokens

get_num_cached_tokens(seq: Sequence) -> int
Source code in vllm/core/placeholder_block_space_manager.py
def get_num_cached_tokens(self, seq: Sequence) -> int:
    return 0

get_num_free_cpu_blocks

get_num_free_cpu_blocks() -> int
Source code in vllm/core/placeholder_block_space_manager.py
def get_num_free_cpu_blocks(self) -> int:
    return 1

get_num_free_gpu_blocks

get_num_free_gpu_blocks() -> int
Source code in vllm/core/placeholder_block_space_manager.py
def get_num_free_gpu_blocks(self) -> int:
    return 1

get_prefix_cache_hit_rate

get_prefix_cache_hit_rate(device: Device) -> float
Source code in vllm/core/placeholder_block_space_manager.py
def get_prefix_cache_hit_rate(self, device: Device) -> float:
    return -1

mark_blocks_as_computed

mark_blocks_as_computed(
    seq_group: SequenceGroup, token_chunk_size: int
)
Source code in vllm/core/placeholder_block_space_manager.py
def mark_blocks_as_computed(self, seq_group: SequenceGroup,
                            token_chunk_size: int):
    pass

remove_seq_from_computed_blocks_tracker

remove_seq_from_computed_blocks_tracker(
    seq: Sequence,
) -> None
Source code in vllm/core/placeholder_block_space_manager.py
def remove_seq_from_computed_blocks_tracker(self, seq: Sequence) -> None:
    return

reset_prefix_cache

reset_prefix_cache(device: Optional[Device] = None) -> bool
Source code in vllm/core/placeholder_block_space_manager.py
def reset_prefix_cache(self, device: Optional[Device] = None) -> bool:
    return True

swap_in

swap_in(seq_group: SequenceGroup) -> List[Tuple[int, int]]
Source code in vllm/core/placeholder_block_space_manager.py
def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
    return None  # type: ignore

swap_out

swap_out(seq_group: SequenceGroup) -> List[Tuple[int, int]]
Source code in vllm/core/placeholder_block_space_manager.py
def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
    return None  # type: ignore