vllm.core.evictor

BlockMetaData ¶

Data structure for storing key data describe cached block, so that evitor could use to make its decision which one to choose for eviction

Here we use physical block id as the dict key, as there maybe several blocks with the same content hash, but their physical id is unique.

Source code in vllm/core/evictor.py

class BlockMetaData:
    """Data structure for storing key data describe cached block, so that
    evitor could use to make its decision which one to choose for eviction

    Here we use physical block id as the dict key, as there maybe several
    blocks with the same content hash, but their physical id is unique.
    """

    def __init__(self, content_hash: int, num_hashed_tokens: int,
                 last_accessed: float):
        self.content_hash = content_hash
        self.num_hashed_tokens = num_hashed_tokens
        self.last_accessed = last_accessed

content_hash `instance-attribute` ¶

content_hash = content_hash

last_accessed `instance-attribute` ¶

last_accessed = last_accessed

num_hashed_tokens `instance-attribute` ¶

num_hashed_tokens = num_hashed_tokens

init ¶

__init__(
    content_hash: int,
    num_hashed_tokens: int,
    last_accessed: float,
)

Source code in vllm/core/evictor.py

def __init__(self, content_hash: int, num_hashed_tokens: int,
             last_accessed: float):
    self.content_hash = content_hash
    self.num_hashed_tokens = num_hashed_tokens
    self.last_accessed = last_accessed

EvictionPolicy ¶

Bases: Enum

Enum for eviction policy used by make_evictor to instantiate the correct Evictor subclass.

Source code in vllm/core/evictor.py

class EvictionPolicy(enum.Enum):
    """Enum for eviction policy used by make_evictor to instantiate the correct
       Evictor subclass.
    """
    LRU = enum.auto()

LRU `class-attribute` `instance-attribute` ¶

LRU = auto()

Evictor ¶

Bases: ABC

The Evictor subclasses should be used by the BlockAllocator class to handle eviction of freed Blocks.

Source code in vllm/core/evictor.py

class Evictor(ABC):
    """The Evictor subclasses should be used by the BlockAllocator class to
    handle eviction of freed Blocks.
    """

    @abstractmethod
    def __init__(self):
        pass

    @abstractmethod
    def __contains__(self, block_id: int) -> bool:
        pass

    @abstractmethod
    def evict(self) -> Tuple[int, int]:
        """Runs the eviction algorithm and returns the evicted block's
        content hash along with physical block id along with physical block id
        """
        pass

    @abstractmethod
    def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
            last_accessed: float):
        """Adds block to the evictor, making it a candidate for eviction"""
        pass

    @abstractmethod
    def update(self, block_id: int, last_accessed: float):
        """Update corresponding block's access time in metadata"""
        pass

    @abstractmethod
    def remove(self, block_id: int):
        """Remove a given block id from the cache."""
        pass

    @property
    @abstractmethod
    def num_blocks(self) -> int:
        pass

num_blocks `abstractmethod` `property` ¶

num_blocks: int

contains `abstractmethod` ¶

__contains__(block_id: int) -> bool

Source code in vllm/core/evictor.py

@abstractmethod
def __contains__(self, block_id: int) -> bool:
    pass

init `abstractmethod` ¶

__init__()

Source code in vllm/core/evictor.py

@abstractmethod
def __init__(self):
    pass

add `abstractmethod` ¶

add(
    block_id: int,
    content_hash: int,
    num_hashed_tokens: int,
    last_accessed: float,
)

Adds block to the evictor, making it a candidate for eviction

Source code in vllm/core/evictor.py

@abstractmethod
def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
        last_accessed: float):
    """Adds block to the evictor, making it a candidate for eviction"""
    pass

evict `abstractmethod` ¶

evict() -> Tuple[int, int]

Runs the eviction algorithm and returns the evicted block's content hash along with physical block id along with physical block id

Source code in vllm/core/evictor.py

@abstractmethod
def evict(self) -> Tuple[int, int]:
    """Runs the eviction algorithm and returns the evicted block's
    content hash along with physical block id along with physical block id
    """
    pass

remove `abstractmethod` ¶

remove(block_id: int)

Remove a given block id from the cache.

Source code in vllm/core/evictor.py

@abstractmethod
def remove(self, block_id: int):
    """Remove a given block id from the cache."""
    pass

update `abstractmethod` ¶

update(block_id: int, last_accessed: float)

Update corresponding block's access time in metadata

Source code in vllm/core/evictor.py

@abstractmethod
def update(self, block_id: int, last_accessed: float):
    """Update corresponding block's access time in metadata"""
    pass

LRUEvictor ¶

Bases: Evictor

Evicts in a least-recently-used order using the last_accessed timestamp that's recorded in the Block. If there are multiple blocks with the same last_accessed time, then the one with the largest num_hashed_tokens will be evicted. If two blocks each have the lowest last_accessed time and highest num_hashed_tokens value, then one will be chose arbitrarily

Source code in vllm/core/evictor.py

class LRUEvictor(Evictor):
    """Evicts in a least-recently-used order using the last_accessed timestamp
    that's recorded in the Block. If there are multiple blocks with
    the same last_accessed time, then the one with the largest num_hashed_tokens
    will be evicted. If two blocks each have the lowest last_accessed time and
    highest num_hashed_tokens value, then one will be chose arbitrarily
    """

    # CLEANUP_THRESHOLD determines the maximum allowable size of the priority
    # queue relative to the free table size. When this threshold is exceeded,
    # a cleanup operation is triggered to reduce memory usage.
    CLEANUP_THRESHOLD = 50

    def __init__(self):
        self.free_table: Dict[int, BlockMetaData] = {}
        self.priority_queue = []

    def __contains__(self, block_id: int) -> bool:
        return block_id in self.free_table

    def evict(self) -> Tuple[int, int]:
        if len(self.free_table) == 0:
            raise ValueError("No usable cache memory left")

        while self.priority_queue:
            # We do not remove outdated entries from the priority queue at the
            # time of updating the last_accessed timestamp. Instead, outdated
            # entries are filtered out here during eviction. Outdated entries
            # would either not in the free table, or have older last accessed
            # time.
            last_accessed, _, block_id, content_hash = heapq.heappop(
                self.priority_queue)
            if (block_id in self.free_table and
                    self.free_table[block_id].last_accessed == last_accessed):
                self.free_table.pop(block_id)
                return block_id, content_hash

        raise ValueError("No usable cache memory left")

    def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
            last_accessed: float):
        self.free_table[block_id] = BlockMetaData(content_hash,
                                                  num_hashed_tokens,
                                                  last_accessed)
        heapq.heappush(
            self.priority_queue,
            (last_accessed, -num_hashed_tokens, block_id, content_hash))
        self._cleanup_if_necessary()

    def update(self, block_id: int, last_accessed: float):
        self.free_table[block_id].last_accessed = last_accessed

    def _cleanup_if_necessary(self):
        if len(self.priority_queue) > LRUEvictor.CLEANUP_THRESHOLD * len(
                self.free_table):
            self._cleanup()

    def _cleanup(self):
        new_priority_queue: List[Tuple[float, int, int, int]] = []

        for block_id, block in self.free_table.items():
            new_priority_queue.append(
                (block.last_accessed, -block.num_hashed_tokens, block_id,
                 block.content_hash))
        heapq.heapify(new_priority_queue)

        self.priority_queue = new_priority_queue

    def remove(self, block_id: int):
        if block_id not in self.free_table:
            raise ValueError(
                "Attempting to remove block that's not in the evictor")
        self.free_table.pop(block_id)

    @property
    def num_blocks(self) -> int:
        return len(self.free_table)

CLEANUP_THRESHOLD `class-attribute` `instance-attribute` ¶

CLEANUP_THRESHOLD = 50

free_table `instance-attribute` ¶

free_table: Dict[int, BlockMetaData] = {}

num_blocks `property` ¶

num_blocks: int

priority_queue `instance-attribute` ¶

priority_queue = []

contains ¶

__contains__(block_id: int) -> bool

Source code in vllm/core/evictor.py

def __contains__(self, block_id: int) -> bool:
    return block_id in self.free_table

init ¶

__init__()

Source code in vllm/core/evictor.py

def __init__(self):
    self.free_table: Dict[int, BlockMetaData] = {}
    self.priority_queue = []

_cleanup ¶

_cleanup()

Source code in vllm/core/evictor.py

def _cleanup(self):
    new_priority_queue: List[Tuple[float, int, int, int]] = []

    for block_id, block in self.free_table.items():
        new_priority_queue.append(
            (block.last_accessed, -block.num_hashed_tokens, block_id,
             block.content_hash))
    heapq.heapify(new_priority_queue)

    self.priority_queue = new_priority_queue

_cleanup_if_necessary ¶

_cleanup_if_necessary()

Source code in vllm/core/evictor.py

def _cleanup_if_necessary(self):
    if len(self.priority_queue) > LRUEvictor.CLEANUP_THRESHOLD * len(
            self.free_table):
        self._cleanup()

add ¶

add(
    block_id: int,
    content_hash: int,
    num_hashed_tokens: int,
    last_accessed: float,
)

Source code in vllm/core/evictor.py

def add(self, block_id: int, content_hash: int, num_hashed_tokens: int,
        last_accessed: float):
    self.free_table[block_id] = BlockMetaData(content_hash,
                                              num_hashed_tokens,
                                              last_accessed)
    heapq.heappush(
        self.priority_queue,
        (last_accessed, -num_hashed_tokens, block_id, content_hash))
    self._cleanup_if_necessary()

evict ¶

evict() -> Tuple[int, int]

Source code in vllm/core/evictor.py

def evict(self) -> Tuple[int, int]:
    if len(self.free_table) == 0:
        raise ValueError("No usable cache memory left")

    while self.priority_queue:
        # We do not remove outdated entries from the priority queue at the
        # time of updating the last_accessed timestamp. Instead, outdated
        # entries are filtered out here during eviction. Outdated entries
        # would either not in the free table, or have older last accessed
        # time.
        last_accessed, _, block_id, content_hash = heapq.heappop(
            self.priority_queue)
        if (block_id in self.free_table and
                self.free_table[block_id].last_accessed == last_accessed):
            self.free_table.pop(block_id)
            return block_id, content_hash

    raise ValueError("No usable cache memory left")

remove ¶

remove(block_id: int)

Source code in vllm/core/evictor.py

def remove(self, block_id: int):
    if block_id not in self.free_table:
        raise ValueError(
            "Attempting to remove block that's not in the evictor")
    self.free_table.pop(block_id)

update ¶

update(block_id: int, last_accessed: float)

Source code in vllm/core/evictor.py

def update(self, block_id: int, last_accessed: float):
    self.free_table[block_id].last_accessed = last_accessed

make_evictor ¶

make_evictor(eviction_policy: EvictionPolicy) -> Evictor

Source code in vllm/core/evictor.py

def make_evictor(eviction_policy: EvictionPolicy) -> Evictor:
    if eviction_policy == EvictionPolicy.LRU:
        return LRUEvictor()
    else:
        raise ValueError(f"Unknown cache eviction policy: {eviction_policy}")

vllm.core.evictor

BlockMetaData ¶

content_hash instance-attribute ¶

last_accessed instance-attribute ¶

num_hashed_tokens instance-attribute ¶

__init__ ¶

EvictionPolicy ¶

LRU class-attribute instance-attribute ¶

Evictor ¶

num_blocks abstractmethod property ¶

__contains__ abstractmethod ¶

__init__ abstractmethod ¶

add abstractmethod ¶

evict abstractmethod ¶

remove abstractmethod ¶

update abstractmethod ¶

LRUEvictor ¶

CLEANUP_THRESHOLD class-attribute instance-attribute ¶

free_table instance-attribute ¶

num_blocks property ¶

priority_queue instance-attribute ¶

__contains__ ¶

__init__ ¶

_cleanup ¶

_cleanup_if_necessary ¶

add ¶

evict ¶

remove ¶

update ¶

make_evictor ¶

content_hash `instance-attribute` ¶

last_accessed `instance-attribute` ¶

num_hashed_tokens `instance-attribute` ¶

init ¶

LRU `class-attribute` `instance-attribute` ¶

num_blocks `abstractmethod` `property` ¶

contains `abstractmethod` ¶

init `abstractmethod` ¶

add `abstractmethod` ¶

evict `abstractmethod` ¶

remove `abstractmethod` ¶

update `abstractmethod` ¶

CLEANUP_THRESHOLD `class-attribute` `instance-attribute` ¶

free_table `instance-attribute` ¶

num_blocks `property` ¶

priority_queue `instance-attribute` ¶

contains ¶

init ¶