vllm.core.block.common

BlockId `module-attribute` ¶

BlockId = int

RefCount `module-attribute` ¶

RefCount = int

BlockList ¶

This class is an optimization to allow fast-access to physical block ids. It maintains a block id list that is updated with the block list and this avoids the need to reconstruct the block id list on every iteration of the block manager

Source code in vllm/core/block/common.py

class BlockList:
    """This class is an optimization to allow fast-access to physical 
    block ids. It maintains a block id list that is updated with the 
    block list and this avoids the need to reconstruct the block id 
    list on every iteration of the block manager
    """

    def __init__(self, blocks: List[Block]):
        self._blocks: List[Block] = []
        self._block_ids: List[int] = []

        self.update(blocks)

    def _add_block_id(self, block_id: Optional[BlockId]) -> None:
        assert block_id is not None
        self._block_ids.append(block_id)

    def _update_block_id(self, block_index: int,
                         new_block_id: Optional[BlockId]) -> None:
        assert new_block_id is not None
        self._block_ids[block_index] = new_block_id

    def update(self, blocks: List[Block]):
        self._blocks = blocks

        # Cache block ids for fast query
        self._block_ids = []
        for block in self._blocks:
            self._add_block_id(block.block_id)

    def append_token_ids(self, block_index: int, token_ids: List[int]) -> None:
        block = self._blocks[block_index]
        prev_block_id = block.block_id

        block.append_token_ids(token_ids)

        # CoW or promotion may update the internal block_id
        if prev_block_id != block.block_id:
            self._update_block_id(block_index, block.block_id)

    def append(self, new_block: Block):
        self._blocks.append(new_block)
        self._add_block_id(new_block.block_id)

    def __len__(self) -> int:
        return len(self._blocks)

    def __getitem__(self, block_index: int) -> Block:
        return self._blocks[block_index]

    def __setitem__(self, block_index: int, new_block: Block) -> None:
        self._blocks[block_index] = new_block
        self._update_block_id(block_index, new_block.block_id)

    def reset(self):
        self._blocks = []
        self._block_ids = []

    def list(self) -> List[Block]:
        return self._blocks

    def ids(self) -> List[int]:
        return self._block_ids

_block_ids `instance-attribute` ¶

_block_ids: List[int] = []

_blocks `instance-attribute` ¶

_blocks: List[Block] = []

getitem ¶

__getitem__(block_index: int) -> Block

Source code in vllm/core/block/common.py

def __getitem__(self, block_index: int) -> Block:
    return self._blocks[block_index]

init ¶

__init__(blocks: List[Block])

Source code in vllm/core/block/common.py

def __init__(self, blocks: List[Block]):
    self._blocks: List[Block] = []
    self._block_ids: List[int] = []

    self.update(blocks)

len ¶

__len__() -> int

Source code in vllm/core/block/common.py

def __len__(self) -> int:
    return len(self._blocks)

setitem ¶

__setitem__(block_index: int, new_block: Block) -> None

Source code in vllm/core/block/common.py

def __setitem__(self, block_index: int, new_block: Block) -> None:
    self._blocks[block_index] = new_block
    self._update_block_id(block_index, new_block.block_id)

_add_block_id ¶

_add_block_id(block_id: Optional[BlockId]) -> None

Source code in vllm/core/block/common.py

def _add_block_id(self, block_id: Optional[BlockId]) -> None:
    assert block_id is not None
    self._block_ids.append(block_id)

_update_block_id ¶

_update_block_id(
    block_index: int, new_block_id: Optional[BlockId]
) -> None

Source code in vllm/core/block/common.py

def _update_block_id(self, block_index: int,
                     new_block_id: Optional[BlockId]) -> None:
    assert new_block_id is not None
    self._block_ids[block_index] = new_block_id

append ¶

append(new_block: Block)

Source code in vllm/core/block/common.py

def append(self, new_block: Block):
    self._blocks.append(new_block)
    self._add_block_id(new_block.block_id)

append_token_ids ¶

append_token_ids(
    block_index: int, token_ids: List[int]
) -> None

Source code in vllm/core/block/common.py

def append_token_ids(self, block_index: int, token_ids: List[int]) -> None:
    block = self._blocks[block_index]
    prev_block_id = block.block_id

    block.append_token_ids(token_ids)

    # CoW or promotion may update the internal block_id
    if prev_block_id != block.block_id:
        self._update_block_id(block_index, block.block_id)

ids ¶

ids() -> List[int]

Source code in vllm/core/block/common.py

def ids(self) -> List[int]:
    return self._block_ids

list ¶

list() -> List[Block]

Source code in vllm/core/block/common.py

def list(self) -> List[Block]:
    return self._blocks

reset ¶

reset()

Source code in vllm/core/block/common.py

def reset(self):
    self._blocks = []
    self._block_ids = []

update ¶

update(blocks: List[Block])

Source code in vllm/core/block/common.py

def update(self, blocks: List[Block]):
    self._blocks = blocks

    # Cache block ids for fast query
    self._block_ids = []
    for block in self._blocks:
        self._add_block_id(block.block_id)

BlockPool ¶

Used to pre-allocate block objects, in order to avoid excessive python object allocations/deallocations. The pool starts from "pool_size" objects and will increase to more objects if necessary

Note that multiple block objects may point to the same physical block id, which is why this pool is needed, so that it will be easier to support prefix caching and more complicated sharing of physical blocks.

Source code in vllm/core/block/common.py

class BlockPool:
    """Used to pre-allocate block objects, in order to avoid excessive python
    object allocations/deallocations.
    The pool starts from "pool_size" objects and will increase to more objects
    if necessary

    Note that multiple block objects may point to the same physical block id,
    which is why this pool is needed, so that it will be easier to support
    prefix caching and more complicated sharing of physical blocks.
    """

    def __init__(self, block_size: int, create_block: Block.Factory,
                 allocator: BlockAllocator, pool_size: int):
        self._block_size = block_size
        self._create_block = create_block
        self._allocator = allocator
        self._pool_size = pool_size
        assert self._pool_size >= 0

        self._free_ids: Deque[int] = deque(range(self._pool_size))
        self._pool = []
        for i in range(self._pool_size):
            self._pool.append(
                self._create_block(prev_block=None,
                                   token_ids=[],
                                   block_size=self._block_size,
                                   allocator=self._allocator,
                                   block_id=None,
                                   extra_hash=None))

    def increase_pool(self):
        """Doubles the internal pool size
        """
        cur_pool_size = self._pool_size
        new_pool_size = cur_pool_size * 2
        self._pool_size = new_pool_size

        self._free_ids += deque(range(cur_pool_size, new_pool_size))

        for i in range(cur_pool_size, new_pool_size):
            self._pool.append(
                self._create_block(prev_block=None,
                                   token_ids=[],
                                   block_size=self._block_size,
                                   allocator=self._allocator,
                                   block_id=None,
                                   extra_hash=None))

    def init_block(self,
                   prev_block: Optional[Block],
                   token_ids: List[int],
                   block_size: int,
                   physical_block_id: Optional[int],
                   extra_hash: Optional[int] = None) -> Block:
        if len(self._free_ids) == 0:
            self.increase_pool()
            assert len(self._free_ids) > 0

        pool_id = self._free_ids.popleft()

        block = self._pool[pool_id]
        block.__init__(  # type: ignore[misc]
            prev_block=prev_block,
            token_ids=token_ids,
            block_size=block_size,
            allocator=block._allocator,  # type: ignore[attr-defined] 
            block_id=physical_block_id,
            extra_hash=extra_hash)
        block.pool_id = pool_id  # type: ignore[attr-defined]
        return block

    def free_block(self, block: Block) -> None:
        self._free_ids.appendleft(block.pool_id)  # type: ignore[attr-defined]

_allocator `instance-attribute` ¶

_allocator = allocator

_block_size `instance-attribute` ¶

_block_size = block_size

_create_block `instance-attribute` ¶

_create_block = create_block

_free_ids `instance-attribute` ¶

_free_ids: Deque[int] = deque(range(_pool_size))

_pool `instance-attribute` ¶

_pool = []

_pool_size `instance-attribute` ¶

_pool_size = pool_size

init ¶

__init__(
    block_size: int,
    create_block: Factory,
    allocator: BlockAllocator,
    pool_size: int,
)

Source code in vllm/core/block/common.py

def __init__(self, block_size: int, create_block: Block.Factory,
             allocator: BlockAllocator, pool_size: int):
    self._block_size = block_size
    self._create_block = create_block
    self._allocator = allocator
    self._pool_size = pool_size
    assert self._pool_size >= 0

    self._free_ids: Deque[int] = deque(range(self._pool_size))
    self._pool = []
    for i in range(self._pool_size):
        self._pool.append(
            self._create_block(prev_block=None,
                               token_ids=[],
                               block_size=self._block_size,
                               allocator=self._allocator,
                               block_id=None,
                               extra_hash=None))

free_block ¶

free_block(block: Block) -> None

Source code in vllm/core/block/common.py

def free_block(self, block: Block) -> None:
    self._free_ids.appendleft(block.pool_id)  # type: ignore[attr-defined]

increase_pool ¶

increase_pool()

Doubles the internal pool size

Source code in vllm/core/block/common.py

def increase_pool(self):
    """Doubles the internal pool size
    """
    cur_pool_size = self._pool_size
    new_pool_size = cur_pool_size * 2
    self._pool_size = new_pool_size

    self._free_ids += deque(range(cur_pool_size, new_pool_size))

    for i in range(cur_pool_size, new_pool_size):
        self._pool.append(
            self._create_block(prev_block=None,
                               token_ids=[],
                               block_size=self._block_size,
                               allocator=self._allocator,
                               block_id=None,
                               extra_hash=None))

init_block ¶

init_block(
    prev_block: Optional[Block],
    token_ids: List[int],
    block_size: int,
    physical_block_id: Optional[int],
    extra_hash: Optional[int] = None,
) -> Block

Source code in vllm/core/block/common.py

def init_block(self,
               prev_block: Optional[Block],
               token_ids: List[int],
               block_size: int,
               physical_block_id: Optional[int],
               extra_hash: Optional[int] = None) -> Block:
    if len(self._free_ids) == 0:
        self.increase_pool()
        assert len(self._free_ids) > 0

    pool_id = self._free_ids.popleft()

    block = self._pool[pool_id]
    block.__init__(  # type: ignore[misc]
        prev_block=prev_block,
        token_ids=token_ids,
        block_size=block_size,
        allocator=block._allocator,  # type: ignore[attr-defined] 
        block_id=physical_block_id,
        extra_hash=extra_hash)
    block.pool_id = pool_id  # type: ignore[attr-defined]
    return block

CacheMetricData `dataclass` ¶

A utility dataclass to maintain cache metric. To avoid overflow, we maintain the hit rate in block granularity, so that we can maintain a single hit rate for n_completed_block x block_size, and calculate the real time hit rate by the following: BS = The number of queries per block. nB = The number of completed blocks. HR = hit rate of (nB x BS) queries. Q = current number of queries (< BS). H = current number of hits (< BS). hit rate = ((HR x nB) + (H / Q) x (Q / BS)) / (nB + Q / BS)

Source code in vllm/core/block/common.py

@dataclass
class CacheMetricData:
    """A utility dataclass to maintain cache metric.
    To avoid overflow, we maintain the hit rate in block granularity, so that
    we can maintain a single hit rate for n_completed_block x block_size,
    and calculate the real time hit rate by the following:
    BS = The number of queries per block.
    nB = The number of completed blocks.
    HR = hit rate of (nB x BS) queries.
    Q = current number of queries (< BS).
    H = current number of hits (< BS).
    hit rate = ((HR x nB) + (H / Q) x (Q / BS)) / (nB + Q / BS)
    """
    num_completed_blocks: int = 0
    completed_block_cache_hit_rate: float = 0.0
    num_incompleted_block_queries: int = 0
    num_incompleted_block_hit: int = 0
    block_size: int = 1000

    def query(self, hit: bool):
        self.num_incompleted_block_queries += 1
        self.num_incompleted_block_hit += 1 if hit else 0

        # When a block is completed, update the cache hit rate
        # and reset the incomplete numbers.
        if self.num_incompleted_block_queries == self.block_size:
            hit_rate = (self.num_incompleted_block_hit /
                        self.num_incompleted_block_queries)
            self.completed_block_cache_hit_rate = (
                self.completed_block_cache_hit_rate * self.num_completed_blocks
                + hit_rate) / (self.num_completed_blocks + 1)
            self.num_incompleted_block_queries = 0
            self.num_incompleted_block_hit = 0
            self.num_completed_blocks += 1

    def get_hit_rate(self):
        incomplete_ratio = self.num_incompleted_block_queries / self.block_size
        total_blocks = self.num_completed_blocks + incomplete_ratio
        if total_blocks == 0:
            return 0.0

        completed_block_hit, incompleted_block_hit = 0.0, 0.0
        if self.num_completed_blocks > 0:
            completed_block_hit = (self.completed_block_cache_hit_rate *
                                   self.num_completed_blocks)
        if self.num_incompleted_block_queries > 0:
            incompleted_hit_rate = (self.num_incompleted_block_hit /
                                    self.num_incompleted_block_queries)
            incompleted_block_hit = (incompleted_hit_rate * incomplete_ratio)
        return (completed_block_hit + incompleted_block_hit) / total_blocks

block_size `class-attribute` `instance-attribute` ¶

block_size: int = 1000

completed_block_cache_hit_rate `class-attribute` `instance-attribute` ¶

completed_block_cache_hit_rate: float = 0.0

num_completed_blocks `class-attribute` `instance-attribute` ¶

num_completed_blocks: int = 0

num_incompleted_block_hit `class-attribute` `instance-attribute` ¶

num_incompleted_block_hit: int = 0

num_incompleted_block_queries `class-attribute` `instance-attribute` ¶

num_incompleted_block_queries: int = 0

init ¶

__init__(
    num_completed_blocks: int = 0,
    completed_block_cache_hit_rate: float = 0.0,
    num_incompleted_block_queries: int = 0,
    num_incompleted_block_hit: int = 0,
    block_size: int = 1000,
) -> None

get_hit_rate ¶

get_hit_rate()

Source code in vllm/core/block/common.py

def get_hit_rate(self):
    incomplete_ratio = self.num_incompleted_block_queries / self.block_size
    total_blocks = self.num_completed_blocks + incomplete_ratio
    if total_blocks == 0:
        return 0.0

    completed_block_hit, incompleted_block_hit = 0.0, 0.0
    if self.num_completed_blocks > 0:
        completed_block_hit = (self.completed_block_cache_hit_rate *
                               self.num_completed_blocks)
    if self.num_incompleted_block_queries > 0:
        incompleted_hit_rate = (self.num_incompleted_block_hit /
                                self.num_incompleted_block_queries)
        incompleted_block_hit = (incompleted_hit_rate * incomplete_ratio)
    return (completed_block_hit + incompleted_block_hit) / total_blocks

query ¶

query(hit: bool)

Source code in vllm/core/block/common.py

def query(self, hit: bool):
    self.num_incompleted_block_queries += 1
    self.num_incompleted_block_hit += 1 if hit else 0

    # When a block is completed, update the cache hit rate
    # and reset the incomplete numbers.
    if self.num_incompleted_block_queries == self.block_size:
        hit_rate = (self.num_incompleted_block_hit /
                    self.num_incompleted_block_queries)
        self.completed_block_cache_hit_rate = (
            self.completed_block_cache_hit_rate * self.num_completed_blocks
            + hit_rate) / (self.num_completed_blocks + 1)
        self.num_incompleted_block_queries = 0
        self.num_incompleted_block_hit = 0
        self.num_completed_blocks += 1

CopyOnWriteTracker ¶

A class for tracking and managing copy-on-write operations for blocks.

The CopyOnWriteTracker class maintains a mapping of source block indices to their corresponding copy-on-write destination block indices. It works in conjunction with a RefCounter.

Parameters:

Name	Type	Description	Default
`refcounter`	`RefCounter`	The reference counter used to track block reference counts.	required

Source code in vllm/core/block/common.py

class CopyOnWriteTracker:
    """A class for tracking and managing copy-on-write operations for blocks.

    The CopyOnWriteTracker class maintains a mapping of source block indices to
        their corresponding copy-on-write destination block indices. It works in
        conjunction with a RefCounter.

    Args:
        refcounter (RefCounter): The reference counter used to track block
            reference counts.
    """

    def __init__(self, refcounter: RefCounterProtocol):
        self._copy_on_writes: List[Tuple[BlockId, BlockId]] = []
        self._refcounter = refcounter

    def is_appendable(self, block: Block) -> bool:
        """Checks if the block is shared or not. If shared, then it cannot
        be appended and needs to be duplicated via copy-on-write
        """
        block_id = block.block_id
        if block_id is None:
            return True

        refcount = self._refcounter.get(block_id)
        return refcount <= 1

    def record_cow(self, src_block_id: Optional[BlockId],
                   trg_block_id: Optional[BlockId]) -> None:
        """Records a copy-on-write operation from source to target block id
        Args:
            src_block_id (BlockId): The source block id from which to copy 
                the data
            trg_block_id (BlockId): The target block id to which the data
                is copied
        """
        assert src_block_id is not None
        assert trg_block_id is not None
        self._copy_on_writes.append((src_block_id, trg_block_id))

    def clear_cows(self) -> List[Tuple[BlockId, BlockId]]:
        """Clears the copy-on-write tracking information and returns the current
        state.

        This method returns a list mapping source block indices to
         destination block indices for the current copy-on-write operations.
        It then clears the internal tracking information.

        Returns:
            List[Tuple[BlockId, BlockId]]: A list mapping source
                block indices to destination block indices for the
                current copy-on-write operations.
        """
        cows = self._copy_on_writes
        self._copy_on_writes = []
        return cows

_copy_on_writes `instance-attribute` ¶

_copy_on_writes: List[Tuple[BlockId, BlockId]] = []

_refcounter `instance-attribute` ¶

_refcounter = refcounter

init ¶

__init__(refcounter: RefCounterProtocol)

Source code in vllm/core/block/common.py

def __init__(self, refcounter: RefCounterProtocol):
    self._copy_on_writes: List[Tuple[BlockId, BlockId]] = []
    self._refcounter = refcounter

clear_cows ¶

clear_cows() -> List[Tuple[BlockId, BlockId]]

Clears the copy-on-write tracking information and returns the current state.

This method returns a list mapping source block indices to destination block indices for the current copy-on-write operations. It then clears the internal tracking information.

Returns:

Type	Description
`List[Tuple[BlockId, BlockId]]`	List[Tuple[BlockId, BlockId]]: A list mapping source block indices to destination block indices for the current copy-on-write operations.

Source code in vllm/core/block/common.py

def clear_cows(self) -> List[Tuple[BlockId, BlockId]]:
    """Clears the copy-on-write tracking information and returns the current
    state.

    This method returns a list mapping source block indices to
     destination block indices for the current copy-on-write operations.
    It then clears the internal tracking information.

    Returns:
        List[Tuple[BlockId, BlockId]]: A list mapping source
            block indices to destination block indices for the
            current copy-on-write operations.
    """
    cows = self._copy_on_writes
    self._copy_on_writes = []
    return cows

is_appendable ¶

is_appendable(block: Block) -> bool

Checks if the block is shared or not. If shared, then it cannot be appended and needs to be duplicated via copy-on-write

Source code in vllm/core/block/common.py

def is_appendable(self, block: Block) -> bool:
    """Checks if the block is shared or not. If shared, then it cannot
    be appended and needs to be duplicated via copy-on-write
    """
    block_id = block.block_id
    if block_id is None:
        return True

    refcount = self._refcounter.get(block_id)
    return refcount <= 1

record_cow ¶

record_cow(
    src_block_id: Optional[BlockId],
    trg_block_id: Optional[BlockId],
) -> None

Records a copy-on-write operation from source to target block id Args: src_block_id (BlockId): The source block id from which to copy the data trg_block_id (BlockId): The target block id to which the data is copied

Source code in vllm/core/block/common.py

def record_cow(self, src_block_id: Optional[BlockId],
               trg_block_id: Optional[BlockId]) -> None:
    """Records a copy-on-write operation from source to target block id
    Args:
        src_block_id (BlockId): The source block id from which to copy 
            the data
        trg_block_id (BlockId): The target block id to which the data
            is copied
    """
    assert src_block_id is not None
    assert trg_block_id is not None
    self._copy_on_writes.append((src_block_id, trg_block_id))

ReadOnlyRefCounter ¶

Bases: RefCounterProtocol

A read-only view of the RefCounter class.

The ReadOnlyRefCounter class provides a read-only interface to access the reference counts maintained by a RefCounter instance. It does not allow modifications to the reference counts.

Parameters:

Name	Type	Description	Default
`refcounter`	`RefCounter`	The RefCounter instance to create a read-only view for.	required

Source code in vllm/core/block/common.py

class ReadOnlyRefCounter(RefCounterProtocol):
    """A read-only view of the RefCounter class.

    The ReadOnlyRefCounter class provides a read-only interface to access the
    reference counts maintained by a RefCounter instance. It does not allow
    modifications to the reference counts.

    Args:
        refcounter (RefCounter): The RefCounter instance to create a read-only
            view for.
    """

    def __init__(self, refcounter: RefCounter):
        self._refcounter = refcounter

    def incr(self, block_id: BlockId) -> RefCount:
        raise ValueError("Incr not allowed")

    def decr(self, block_id: BlockId) -> RefCount:
        raise ValueError("Decr not allowed")

    def get(self, block_id: BlockId) -> RefCount:
        return self._refcounter.get(block_id)

_refcounter `instance-attribute` ¶

_refcounter = refcounter

init ¶

__init__(refcounter: RefCounter)

Source code in vllm/core/block/common.py

def __init__(self, refcounter: RefCounter):
    self._refcounter = refcounter

decr ¶

decr(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def decr(self, block_id: BlockId) -> RefCount:
    raise ValueError("Decr not allowed")

get ¶

get(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def get(self, block_id: BlockId) -> RefCount:
    return self._refcounter.get(block_id)

incr ¶

incr(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def incr(self, block_id: BlockId) -> RefCount:
    raise ValueError("Incr not allowed")

RefCounter ¶

Bases: RefCounterProtocol

A class for managing reference counts for a set of block indices.

The RefCounter class maintains a dictionary that maps block indices to their corresponding reference counts. It provides methods to increment, decrement, and retrieve the reference count for a given block index.

Parameters:

Name	Type	Description	Default
`all_block_indices`	`Iterable[BlockId]`	An iterable of block indices to initialize the reference counter with.	required

Source code in vllm/core/block/common.py

class RefCounter(RefCounterProtocol):
    """A class for managing reference counts for a set of block indices.

    The RefCounter class maintains a dictionary that maps block indices to their
    corresponding reference counts. It provides methods to increment, decrement,
    and retrieve the reference count for a given block index.

    Args:
        all_block_indices (Iterable[BlockId]): An iterable of block indices
            to initialize the reference counter with.
    """

    def __init__(self, all_block_indices: Iterable[BlockId]):
        deduped = set(all_block_indices)
        self._refcounts: Dict[BlockId, RefCount] = {
            index: 0
            for index in deduped
        }

    def incr(self, block_id: BlockId) -> RefCount:
        assert block_id in self._refcounts
        pre_incr_refcount = self._refcounts[block_id]

        assert pre_incr_refcount >= 0

        post_incr_refcount = pre_incr_refcount + 1
        self._refcounts[block_id] = post_incr_refcount
        return post_incr_refcount

    def decr(self, block_id: BlockId) -> RefCount:
        assert block_id in self._refcounts
        refcount = self._refcounts[block_id]

        assert refcount > 0
        refcount -= 1

        self._refcounts[block_id] = refcount

        return refcount

    def get(self, block_id: BlockId) -> RefCount:
        assert block_id in self._refcounts
        return self._refcounts[block_id]

    def as_readonly(self) -> "ReadOnlyRefCounter":
        return ReadOnlyRefCounter(self)

_refcounts `instance-attribute` ¶

_refcounts: Dict[BlockId, RefCount] = {
    index: 0 for index in deduped
}

init ¶

__init__(all_block_indices: Iterable[BlockId])

Source code in vllm/core/block/common.py

def __init__(self, all_block_indices: Iterable[BlockId]):
    deduped = set(all_block_indices)
    self._refcounts: Dict[BlockId, RefCount] = {
        index: 0
        for index in deduped
    }

as_readonly ¶

as_readonly() -> ReadOnlyRefCounter

Source code in vllm/core/block/common.py

def as_readonly(self) -> "ReadOnlyRefCounter":
    return ReadOnlyRefCounter(self)

decr ¶

decr(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def decr(self, block_id: BlockId) -> RefCount:
    assert block_id in self._refcounts
    refcount = self._refcounts[block_id]

    assert refcount > 0
    refcount -= 1

    self._refcounts[block_id] = refcount

    return refcount

get ¶

get(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def get(self, block_id: BlockId) -> RefCount:
    assert block_id in self._refcounts
    return self._refcounts[block_id]

incr ¶

incr(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def incr(self, block_id: BlockId) -> RefCount:
    assert block_id in self._refcounts
    pre_incr_refcount = self._refcounts[block_id]

    assert pre_incr_refcount >= 0

    post_incr_refcount = pre_incr_refcount + 1
    self._refcounts[block_id] = post_incr_refcount
    return post_incr_refcount

RefCounterProtocol ¶

Bases: Protocol

Source code in vllm/core/block/common.py

class RefCounterProtocol(Protocol):

    def incr(self, block_id: BlockId) -> RefCount:
        raise NotImplementedError

    def decr(self, block_id: BlockId) -> RefCount:
        raise NotImplementedError

    def get(self, block_id: BlockId) -> RefCount:
        raise NotImplementedError

decr ¶

decr(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def decr(self, block_id: BlockId) -> RefCount:
    raise NotImplementedError

get ¶

get(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def get(self, block_id: BlockId) -> RefCount:
    raise NotImplementedError

incr ¶

incr(block_id: BlockId) -> RefCount

Source code in vllm/core/block/common.py

def incr(self, block_id: BlockId) -> RefCount:
    raise NotImplementedError

get_all_blocks_recursively ¶

get_all_blocks_recursively(
    last_block: Block,
) -> List[Block]

Retrieves all the blocks in a sequence starting from the last block.

This function recursively traverses the sequence of blocks in reverse order, starting from the given last block, and returns a list of all the blocks in the sequence.

Parameters:

Name	Type	Description	Default
`last_block`	`Block`	The last block in the sequence.	required

Returns:

Type	Description
`List[Block]`	List[Block]: A list of all the blocks in the sequence, in the order they appear.

Source code in vllm/core/block/common.py

def get_all_blocks_recursively(last_block: Block) -> List[Block]:
    """Retrieves all the blocks in a sequence starting from the last block.

    This function recursively traverses the sequence of blocks in reverse order,
    starting from the given last block, and returns a list of all the blocks in
    the sequence.

    Args:
        last_block (Block): The last block in the sequence.

    Returns:
        List[Block]: A list of all the blocks in the sequence, in the order they
            appear.
    """

    def recurse(block: Block, lst: List[Block]) -> None:
        if block.prev_block is not None:
            recurse(block.prev_block, lst)
        lst.append(block)

    all_blocks: List[Block] = []
    recurse(last_block, all_blocks)
    return all_blocks

vllm.core.block.common

BlockId module-attribute ¶

RefCount module-attribute ¶

BlockList ¶

_block_ids instance-attribute ¶

_blocks instance-attribute ¶

__getitem__ ¶

__init__ ¶

__len__ ¶

__setitem__ ¶

_add_block_id ¶

_update_block_id ¶

append ¶

append_token_ids ¶

ids ¶

list ¶

reset ¶

update ¶

BlockPool ¶

_allocator instance-attribute ¶

_block_size instance-attribute ¶

_create_block instance-attribute ¶

_free_ids instance-attribute ¶

_pool instance-attribute ¶

_pool_size instance-attribute ¶

__init__ ¶

free_block ¶

increase_pool ¶

init_block ¶

CacheMetricData dataclass ¶

block_size class-attribute instance-attribute ¶

completed_block_cache_hit_rate class-attribute instance-attribute ¶

num_completed_blocks class-attribute instance-attribute ¶

num_incompleted_block_hit class-attribute instance-attribute ¶

num_incompleted_block_queries class-attribute instance-attribute ¶

__init__ ¶

get_hit_rate ¶

query ¶

CopyOnWriteTracker ¶

_copy_on_writes instance-attribute ¶

_refcounter instance-attribute ¶

__init__ ¶

clear_cows ¶

is_appendable ¶

record_cow ¶

ReadOnlyRefCounter ¶

_refcounter instance-attribute ¶

__init__ ¶

decr ¶

get ¶

incr ¶

RefCounter ¶

_refcounts instance-attribute ¶

__init__ ¶

as_readonly ¶

decr ¶

get ¶

incr ¶

RefCounterProtocol ¶

decr ¶

get ¶

incr ¶

get_all_blocks_recursively ¶

BlockId `module-attribute` ¶

RefCount `module-attribute` ¶

_block_ids `instance-attribute` ¶

_blocks `instance-attribute` ¶

getitem ¶

init ¶

len ¶

setitem ¶

_allocator `instance-attribute` ¶

_block_size `instance-attribute` ¶

_create_block `instance-attribute` ¶

_free_ids `instance-attribute` ¶

_pool `instance-attribute` ¶

_pool_size `instance-attribute` ¶

init ¶

CacheMetricData `dataclass` ¶

block_size `class-attribute` `instance-attribute` ¶

completed_block_cache_hit_rate `class-attribute` `instance-attribute` ¶

num_completed_blocks `class-attribute` `instance-attribute` ¶

num_incompleted_block_hit `class-attribute` `instance-attribute` ¶

num_incompleted_block_queries `class-attribute` `instance-attribute` ¶

init ¶

_copy_on_writes `instance-attribute` ¶

_refcounter `instance-attribute` ¶

init ¶

_refcounter `instance-attribute` ¶

init ¶

_refcounts `instance-attribute` ¶

init ¶