vllm.core.block.naive_block

Refcount `module-attribute` ¶

Refcount = int

NaiveBlock ¶

Bases: Block

An implementation of the Block class that does not support prefix caching.

The NaiveBlock class represents a block of token IDs with a fixed size. It provides methods for appending token IDs to the block and manages copy-on -write operations when necessary.

Parameters:

Name	Type	Description	Default
`prev_block`	`Block`	The previous block in the sequence.	required
`token_ids`	`List[int]`	The initial token IDs to be stored in the block.	required
`block_size`	`int`	The maximum number of token IDs that can be stored in the block.	required
`allocator`	`BlockAllocator`	The block allocator associated with this block.	required
`block_id`	`Optional[int]`	The physical block index of this block. Defaults to None, which means no allocation has been made.	`None`
`_cow_target`	`Optional[Block]`	The copy-on-write target block. If not provided, it defaults to self.	`None`

Source code in vllm/core/block/naive_block.py

class NaiveBlock(Block):
    """An implementation of the Block class that does not support prefix
    caching.

    The NaiveBlock class represents a block of token IDs with a fixed size. It
    provides methods for appending token IDs to the block and manages copy-on
    -write operations when necessary.

    Args:
        prev_block (Block): The previous block in the sequence.
        token_ids (List[int]): The initial token IDs to be stored in the block.
        block_size (int): The maximum number of token IDs that can be stored in
            the block.
        allocator (BlockAllocator): The block allocator associated with this
            block.
        block_id (Optional[int], optional): The physical block index
            of this block. Defaults to None, which means no allocation has been
            made.
        _cow_target (Optional[Block], optional): The copy-on-write target block.
            If not provided, it defaults to self.
    """

    def __init__(self,
                 prev_block: Optional[Block],
                 token_ids: List[int],
                 block_size: int,
                 allocator: BlockAllocator,
                 block_id: Optional[int] = None,
                 _cow_target: Optional[Block] = None,
                 extra_hash: Optional[int] = None):
        self._token_ids: List[int] = []
        self._block_size = block_size
        self._prev_block = prev_block
        self._block_id = block_id
        self._allocator = allocator
        self._cow_target = _cow_target if _cow_target is not None else self

        self._append_token_ids_no_cow(token_ids)

    def append_token_ids(self, token_ids: List[int]) -> None:
        """Appends the given token IDs to the block and performs a 
        copy-on-write if necessary.

        Args:
            token_ids (Optional[List[int]]): The token IDs to be appended 
                to the block.
        """
        self._append_token_ids_no_cow(token_ids)

        if self._block_id is not None:
            self._block_id = (self._allocator.cow_block_if_not_appendable(
                self._cow_target))

    def _append_token_ids_no_cow(self, token_ids: List[int]) -> None:
        """Appends the given token IDs to the block

        Args:
            token_ids (List[int]): The token IDs to be appended to the block.
        """
        if len(token_ids) == 0:
            return

        assert len(token_ids) <= self.num_empty_slots

        self._token_ids.extend(token_ids)

    @property
    def computed(self) -> bool:
        raise NotImplementedError

    @computed.setter
    def computed(self, value) -> None:
        raise NotImplementedError

    @property
    def last_accessed(self) -> float:
        raise NotImplementedError

    @last_accessed.setter
    def last_accessed(self, last_accessed_ts: float):
        raise NotImplementedError

    @property
    def block_id(self) -> Optional[int]:
        return self._block_id

    @block_id.setter
    def block_id(self, value: Optional[int]) -> None:
        self._block_id = value

    @property
    def is_full(self) -> bool:
        return self.num_empty_slots == 0

    @property
    def num_empty_slots(self) -> int:
        return self._block_size - len(self.token_ids)

    @property
    def token_ids(self) -> List[int]:
        return self._token_ids

    @property
    def num_tokens_total(self) -> int:
        raise NotImplementedError(
            "num_tokens_total is not used for naive block")

    @property
    def block_size(self) -> int:
        return self._block_size

    @property
    def prev_block(self) -> Optional["Block"]:
        return self._prev_block

    @property
    def extra_hash(self):
        return None

    @property
    def content_hash(self) -> Optional[int]:
        return None

_allocator `instance-attribute` ¶

_allocator = allocator

_block_id `instance-attribute` ¶

_block_id = block_id

_block_size `instance-attribute` ¶

_block_size = block_size

_cow_target `instance-attribute` ¶

_cow_target = (
    _cow_target if _cow_target is not None else self
)

_prev_block `instance-attribute` ¶

_prev_block = prev_block

_token_ids `instance-attribute` ¶

_token_ids: List[int] = []

block_id `property` `writable` ¶

block_id: Optional[int]

block_size `property` ¶

block_size: int

computed `property` `writable` ¶

computed: bool

content_hash `property` ¶

content_hash: Optional[int]

extra_hash `property` ¶

extra_hash

is_full `property` ¶

is_full: bool

last_accessed `property` `writable` ¶

last_accessed: float

num_empty_slots `property` ¶

num_empty_slots: int

num_tokens_total `property` ¶

num_tokens_total: int

prev_block `property` ¶

prev_block: Optional[Block]

token_ids `property` ¶

token_ids: List[int]

init ¶

__init__(
    prev_block: Optional[Block],
    token_ids: List[int],
    block_size: int,
    allocator: BlockAllocator,
    block_id: Optional[int] = None,
    _cow_target: Optional[Block] = None,
    extra_hash: Optional[int] = None,
)

Source code in vllm/core/block/naive_block.py

def __init__(self,
             prev_block: Optional[Block],
             token_ids: List[int],
             block_size: int,
             allocator: BlockAllocator,
             block_id: Optional[int] = None,
             _cow_target: Optional[Block] = None,
             extra_hash: Optional[int] = None):
    self._token_ids: List[int] = []
    self._block_size = block_size
    self._prev_block = prev_block
    self._block_id = block_id
    self._allocator = allocator
    self._cow_target = _cow_target if _cow_target is not None else self

    self._append_token_ids_no_cow(token_ids)

_append_token_ids_no_cow ¶

_append_token_ids_no_cow(token_ids: List[int]) -> None

Appends the given token IDs to the block

Parameters:

Name	Type	Description	Default
`token_ids`	`List[int]`	The token IDs to be appended to the block.	required

Source code in vllm/core/block/naive_block.py

def _append_token_ids_no_cow(self, token_ids: List[int]) -> None:
    """Appends the given token IDs to the block

    Args:
        token_ids (List[int]): The token IDs to be appended to the block.
    """
    if len(token_ids) == 0:
        return

    assert len(token_ids) <= self.num_empty_slots

    self._token_ids.extend(token_ids)

append_token_ids ¶

append_token_ids(token_ids: List[int]) -> None

Appends the given token IDs to the block and performs a copy-on-write if necessary.

Parameters:

Name	Type	Description	Default
`token_ids`	`Optional[List[int]]`	The token IDs to be appended to the block.	required

Source code in vllm/core/block/naive_block.py

def append_token_ids(self, token_ids: List[int]) -> None:
    """Appends the given token IDs to the block and performs a 
    copy-on-write if necessary.

    Args:
        token_ids (Optional[List[int]]): The token IDs to be appended 
            to the block.
    """
    self._append_token_ids_no_cow(token_ids)

    if self._block_id is not None:
        self._block_id = (self._allocator.cow_block_if_not_appendable(
            self._cow_target))

NaiveBlockAllocator ¶

Bases: BlockAllocator

A simple block allocator that manages blocks of memory without prefix caching.

Parameters:

Name	Type	Description	Default
`create_block`	`Factory`	A factory function for creating new blocks. This is used when a NaiveBlockAllocator is composed within a prefix caching allocator -- the naive block allocator must construct prefix caching blocks (but shouldn't know anything else about them).	required
`num_blocks`	`int`	The total number of blocks to manage.	required
`block_size`	`int`	The size of each block in tokens.	required
`block_ids`	`Optional[Iterable[int]]`	An optional iterable of block IDs. If not provided, block IDs will be assigned sequentially from 0 to num_blocks - 1.	`None`

Source code in vllm/core/block/naive_block.py

class NaiveBlockAllocator(BlockAllocator):
    """A simple block allocator that manages blocks of memory without prefix
    caching.

    Args:
        create_block (Block.Factory): A factory function for creating new
            blocks. This is used when a NaiveBlockAllocator is composed within
            a prefix caching allocator -- the naive block allocator must
            construct prefix caching blocks (but shouldn't know anything else
            about them).
        num_blocks (int): The total number of blocks to manage.
        block_size (int): The size of each block in tokens.
        block_ids (Optional[Iterable[int]], optional): An optional iterable of
            block IDs. If not provided, block IDs will be assigned sequentially
            from 0 to num_blocks - 1.
    """

    def __init__(
        self,
        create_block: Block.Factory,
        num_blocks: int,
        block_size: int,
        block_ids: Optional[Iterable[int]] = None,
        block_pool: Optional[BlockPool] = None,
    ):
        if block_ids is None:
            block_ids = range(num_blocks)

        self._free_block_indices: Deque[BlockId] = deque(block_ids)
        self._all_block_indices = frozenset(block_ids)
        assert len(self._all_block_indices) == num_blocks

        self._refcounter = RefCounter(
            all_block_indices=self._free_block_indices)
        self._block_size = block_size

        self._cow_tracker = CopyOnWriteTracker(
            refcounter=self._refcounter.as_readonly())

        if block_pool is None:
            extra_factor = 4
            # Pre-allocate "num_blocks * extra_factor" block objects.
            # The "* extra_factor" is a buffer to allow more block objects
            # than physical blocks
            self._block_pool = BlockPool(self._block_size, create_block, self,
                                         num_blocks * extra_factor)
        else:
            # In this case, the block pool is provided by the caller,
            # which means that there is most likely a need to share
            # a block pool between allocators
            self._block_pool = block_pool

    def allocate_immutable_block(self,
                                 prev_block: Optional[Block],
                                 token_ids: List[int],
                                 extra_hash: Optional[int] = None,
                                 device: Optional[Device] = None) -> Block:
        """Allocates a new immutable block with the given token IDs, linked to
        the previous block.

        Args:
            prev_block (Optional[Block]): The previous block in the sequence. If
                None, then the block to be allocated is the first block in the
                sequence.
            token_ids (List[int]): The token IDs to be stored in the new block.

        Returns:
            Block: The newly allocated immutable block.
        """
        assert device is None
        block = self.allocate_mutable_block(prev_block=prev_block)
        block.append_token_ids(token_ids)
        return block

    def allocate_immutable_blocks(
            self,
            prev_block: Optional[Block],
            block_token_ids: List[List[int]],
            extra_hash: Optional[int] = None,
            device: Optional[Device] = None) -> List[Block]:
        assert device is None
        num_blocks = len(block_token_ids)

        block_ids = []
        for i in range(num_blocks):
            block_ids.append(self._allocate_block_id())

        blocks = []
        for i in range(num_blocks):
            prev_block = self._block_pool.init_block(
                prev_block=prev_block,
                token_ids=block_token_ids[i],
                block_size=self._block_size,
                physical_block_id=block_ids[i])
            blocks.append(prev_block)

        return blocks

    def allocate_mutable_block(self,
                               prev_block: Optional[Block],
                               extra_hash: Optional[int] = None,
                               device: Optional[Device] = None) -> Block:
        """Allocates a new mutable block, linked to the previous block.

        Args:
            prev_block (Optional[Block]): The previous block in the sequence. If
                None, then the block to be allocated is the first block in the
                sequence.

        Returns:
            Block: The newly allocated mutable block.
        """
        assert device is None
        block_id = self._allocate_block_id()
        block = self._block_pool.init_block(prev_block=prev_block,
                                            token_ids=[],
                                            block_size=self._block_size,
                                            physical_block_id=block_id)
        return block

    def _allocate_block_id(self) -> BlockId:
        if not self._free_block_indices:
            raise BlockAllocator.NoFreeBlocksError()

        block_id = self._free_block_indices.popleft()
        self._refcounter.incr(block_id)
        return block_id

    def _free_block_id(self, block: Union[Block, BlockId]) -> None:
        if isinstance(block, Block):
            block_id = block.block_id
            block.block_id = None
        else:
            block_id = block
        assert block_id is not None

        refcount = self._refcounter.decr(block_id)
        if refcount == 0:
            self._free_block_indices.appendleft(block_id)

    def free(self, block: Block, keep_block_object: bool = False) -> None:
        # Release the physical block id
        self._free_block_id(block)

        # Release the block object
        if not keep_block_object:
            self._block_pool.free_block(block)

    def free_block_id(self, block_id: BlockId) -> None:
        self._free_block_id(block_id)

    def fork(self, last_block: Block) -> List[Block]:
        """Creates a new sequence of blocks that shares the same underlying
        memory as the original sequence.

        Args:
            last_block (Block): The last block in the original sequence.

        Returns:
            List[Block]: The new sequence of blocks that shares the same memory
                as the original sequence.
        """
        source_blocks = get_all_blocks_recursively(last_block)

        forked_blocks: List[Block] = []
        prev_block = None
        for block in source_blocks:

            # Increment refcount for each block.
            assert block.block_id is not None
            refcount = self._refcounter.incr(block.block_id)
            assert refcount != 1, "can't fork free'd block"

            forked_block = self._block_pool.init_block(
                prev_block=prev_block,
                token_ids=block.token_ids,
                block_size=self._block_size,
                physical_block_id=block.block_id)

            forked_blocks.append(forked_block)
            prev_block = forked_blocks[-1]

        return forked_blocks

    def get_num_free_blocks(self) -> int:
        return len(self._free_block_indices)

    def get_num_total_blocks(self) -> int:
        return len(self._all_block_indices)

    def get_physical_block_id(self, absolute_id: int) -> int:
        """Returns the zero-offset block id on certain block allocator
        given the absolute block id.

        Args:
            absolute_id (int): The absolute block id for the block 
            in whole allocator.

        Returns:
            int: The zero-offset block id on certain device.
        """
        return sorted(self._all_block_indices).index(absolute_id)

    @property
    def refcounter(self):
        return self._refcounter

    @property
    def all_block_ids(self) -> FrozenSet[int]:
        return self._all_block_indices

    def cow_block_if_not_appendable(self, block: Block) -> BlockId:
        """Performs a copy-on-write operation on the given block if it is not
        appendable.

        Args:
            block (Block): The block to check for copy-on-write.

        Returns:
            BlockId: The block index of the new block if a copy-on-write 
                operation was performed, or the original block index if
                no copy-on-write was necessary.
        """
        src_block_id = block.block_id
        assert src_block_id is not None

        if self._cow_tracker.is_appendable(block):
            return src_block_id

        self._free_block_id(block)
        trg_block_id = self._allocate_block_id()

        self._cow_tracker.record_cow(src_block_id, trg_block_id)

        return trg_block_id

    def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
        """Returns the copy-on-write source->destination mapping and clears it.

        Returns:
            List[Tuple[BlockId, BlockId]]: A list mapping source
                block indices to destination block indices.
        """
        return self._cow_tracker.clear_cows()

    def mark_blocks_as_accessed(self, block_ids: List[int],
                                now: float) -> None:
        """Mark blocks as accessed, used in prefix caching.

        Since the naive allocator does not implement prefix caching, we do
        nothing.
        """
        pass

    def mark_blocks_as_computed(self, block_ids: List[int]) -> None:
        """Mark blocks as computed, used in prefix caching.

        Since the naive allocator does not implement prefix caching, we do
        nothing.
        """
        pass

    def get_common_computed_block_ids(
            self, computed_seq_block_ids: List[List[int]]) -> List[int]:
        """Determine blocks that can be skipped in prefill.

        Since the naive allocator does not support prefix caching, always return
        an empty list.
        """
        return []

    def promote_to_immutable_block(self, block: Block) -> BlockId:
        raise NotImplementedError("There is no promotion for naive blocks")

    def get_num_full_blocks_touched(self, blocks: List[Block]) -> int:
        """Returns the number of full blocks that will be touched by
        swapping in/out.

        Args:
            blocks: List of blocks to be swapped.
        Returns:
            int: the number of full blocks that will be touched by
                swapping in/out the given blocks. Non full blocks are ignored
                when deciding the number of blocks to touch.
        """
        # NOTE: for naive block, we use set to eliminate common blocks among
        # seqs, also we compare the empty slots in the mutable blocks with
        # lookahead slots to get the number of unique new block that are
        # needed.
        old_block_set = set()
        for block in blocks:
            if block.is_full:
                old_block_set.add(block)
        return len(old_block_set)

    def swap_out(self, blocks: List[Block]) -> None:
        for block in blocks:
            self._free_block_id(block)

    def swap_in(self, blocks: List[Block]) -> None:
        for block in blocks:
            # Here we allocate either immutable or mutable block and then
            # extract its block_id. Note that the block object is released
            # and the block_id is assigned to "block" to allow reusing the
            # existing "block" object
            if block.is_full:
                tmp_block = self.allocate_immutable_block(
                    prev_block=block.prev_block, token_ids=block.token_ids)
            else:
                tmp_block = self.allocate_mutable_block(
                    prev_block=block.prev_block)
                tmp_block.append_token_ids(block.token_ids)

            block_id = tmp_block.block_id
            tmp_block.block_id = None
            self._block_pool.free_block(tmp_block)

            block.block_id = block_id  # Assign block_id

    def get_prefix_cache_hit_rate(self) -> float:
        return -1

    def reset_prefix_cache(self) -> bool:
        """No prefix cache for naive block allocator."""
        return True

    def find_cached_blocks_prefix(self, block_hashes: List[int]) -> List[int]:
        # Not applicable for naive block allocator.
        return []

_all_block_indices `instance-attribute` ¶

_all_block_indices = frozenset(block_ids)

_block_pool `instance-attribute` ¶

_block_pool = BlockPool(
    _block_size,
    create_block,
    self,
    num_blocks * extra_factor,
)

_block_size `instance-attribute` ¶

_block_size = block_size

_cow_tracker `instance-attribute` ¶

_cow_tracker = CopyOnWriteTracker(refcounter=as_readonly())

_free_block_indices `instance-attribute` ¶

_free_block_indices: Deque[BlockId] = deque(block_ids)

_refcounter `instance-attribute` ¶

_refcounter = RefCounter(
    all_block_indices=_free_block_indices
)

all_block_ids `property` ¶

all_block_ids: FrozenSet[int]

refcounter `property` ¶

refcounter

init ¶

__init__(
    create_block: Factory,
    num_blocks: int,
    block_size: int,
    block_ids: Optional[Iterable[int]] = None,
    block_pool: Optional[BlockPool] = None,
)

Source code in vllm/core/block/naive_block.py

def __init__(
    self,
    create_block: Block.Factory,
    num_blocks: int,
    block_size: int,
    block_ids: Optional[Iterable[int]] = None,
    block_pool: Optional[BlockPool] = None,
):
    if block_ids is None:
        block_ids = range(num_blocks)

    self._free_block_indices: Deque[BlockId] = deque(block_ids)
    self._all_block_indices = frozenset(block_ids)
    assert len(self._all_block_indices) == num_blocks

    self._refcounter = RefCounter(
        all_block_indices=self._free_block_indices)
    self._block_size = block_size

    self._cow_tracker = CopyOnWriteTracker(
        refcounter=self._refcounter.as_readonly())

    if block_pool is None:
        extra_factor = 4
        # Pre-allocate "num_blocks * extra_factor" block objects.
        # The "* extra_factor" is a buffer to allow more block objects
        # than physical blocks
        self._block_pool = BlockPool(self._block_size, create_block, self,
                                     num_blocks * extra_factor)
    else:
        # In this case, the block pool is provided by the caller,
        # which means that there is most likely a need to share
        # a block pool between allocators
        self._block_pool = block_pool

_allocate_block_id ¶

_allocate_block_id() -> BlockId

Source code in vllm/core/block/naive_block.py

def _allocate_block_id(self) -> BlockId:
    if not self._free_block_indices:
        raise BlockAllocator.NoFreeBlocksError()

    block_id = self._free_block_indices.popleft()
    self._refcounter.incr(block_id)
    return block_id

_free_block_id ¶

_free_block_id(block: Union[Block, BlockId]) -> None

Source code in vllm/core/block/naive_block.py

def _free_block_id(self, block: Union[Block, BlockId]) -> None:
    if isinstance(block, Block):
        block_id = block.block_id
        block.block_id = None
    else:
        block_id = block
    assert block_id is not None

    refcount = self._refcounter.decr(block_id)
    if refcount == 0:
        self._free_block_indices.appendleft(block_id)

allocate_immutable_block ¶

allocate_immutable_block(
    prev_block: Optional[Block],
    token_ids: List[int],
    extra_hash: Optional[int] = None,
    device: Optional[Device] = None,
) -> Block

Allocates a new immutable block with the given token IDs, linked to the previous block.

Parameters:

Name	Type	Description	Default
`prev_block`	`Optional[Block]`	The previous block in the sequence. If None, then the block to be allocated is the first block in the sequence.	required
`token_ids`	`List[int]`	The token IDs to be stored in the new block.	required

Returns:

Name	Type	Description
`Block`	`Block`	The newly allocated immutable block.

Source code in vllm/core/block/naive_block.py

def allocate_immutable_block(self,
                             prev_block: Optional[Block],
                             token_ids: List[int],
                             extra_hash: Optional[int] = None,
                             device: Optional[Device] = None) -> Block:
    """Allocates a new immutable block with the given token IDs, linked to
    the previous block.

    Args:
        prev_block (Optional[Block]): The previous block in the sequence. If
            None, then the block to be allocated is the first block in the
            sequence.
        token_ids (List[int]): The token IDs to be stored in the new block.

    Returns:
        Block: The newly allocated immutable block.
    """
    assert device is None
    block = self.allocate_mutable_block(prev_block=prev_block)
    block.append_token_ids(token_ids)
    return block

allocate_immutable_blocks ¶

allocate_immutable_blocks(
    prev_block: Optional[Block],
    block_token_ids: List[List[int]],
    extra_hash: Optional[int] = None,
    device: Optional[Device] = None,
) -> List[Block]

Source code in vllm/core/block/naive_block.py

def allocate_immutable_blocks(
        self,
        prev_block: Optional[Block],
        block_token_ids: List[List[int]],
        extra_hash: Optional[int] = None,
        device: Optional[Device] = None) -> List[Block]:
    assert device is None
    num_blocks = len(block_token_ids)

    block_ids = []
    for i in range(num_blocks):
        block_ids.append(self._allocate_block_id())

    blocks = []
    for i in range(num_blocks):
        prev_block = self._block_pool.init_block(
            prev_block=prev_block,
            token_ids=block_token_ids[i],
            block_size=self._block_size,
            physical_block_id=block_ids[i])
        blocks.append(prev_block)

    return blocks

allocate_mutable_block ¶

allocate_mutable_block(
    prev_block: Optional[Block],
    extra_hash: Optional[int] = None,
    device: Optional[Device] = None,
) -> Block

Allocates a new mutable block, linked to the previous block.

Parameters:

Name	Type	Description	Default
`prev_block`	`Optional[Block]`	The previous block in the sequence. If None, then the block to be allocated is the first block in the sequence.	required

Returns:

Name	Type	Description
`Block`	`Block`	The newly allocated mutable block.

Source code in vllm/core/block/naive_block.py

def allocate_mutable_block(self,
                           prev_block: Optional[Block],
                           extra_hash: Optional[int] = None,
                           device: Optional[Device] = None) -> Block:
    """Allocates a new mutable block, linked to the previous block.

    Args:
        prev_block (Optional[Block]): The previous block in the sequence. If
            None, then the block to be allocated is the first block in the
            sequence.

    Returns:
        Block: The newly allocated mutable block.
    """
    assert device is None
    block_id = self._allocate_block_id()
    block = self._block_pool.init_block(prev_block=prev_block,
                                        token_ids=[],
                                        block_size=self._block_size,
                                        physical_block_id=block_id)
    return block

clear_copy_on_writes ¶

clear_copy_on_writes() -> List[Tuple[BlockId, BlockId]]

Returns the copy-on-write source->destination mapping and clears it.

Returns:

Type	Description
`List[Tuple[BlockId, BlockId]]`	List[Tuple[BlockId, BlockId]]: A list mapping source block indices to destination block indices.

Source code in vllm/core/block/naive_block.py

def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
    """Returns the copy-on-write source->destination mapping and clears it.

    Returns:
        List[Tuple[BlockId, BlockId]]: A list mapping source
            block indices to destination block indices.
    """
    return self._cow_tracker.clear_cows()

cow_block_if_not_appendable ¶

cow_block_if_not_appendable(block: Block) -> BlockId

Performs a copy-on-write operation on the given block if it is not appendable.

Parameters:

Name	Type	Description	Default
`block`	`Block`	The block to check for copy-on-write.	required

Returns:

Name	Type	Description
`BlockId`	`BlockId`	The block index of the new block if a copy-on-write operation was performed, or the original block index if no copy-on-write was necessary.

Source code in vllm/core/block/naive_block.py

def cow_block_if_not_appendable(self, block: Block) -> BlockId:
    """Performs a copy-on-write operation on the given block if it is not
    appendable.

    Args:
        block (Block): The block to check for copy-on-write.

    Returns:
        BlockId: The block index of the new block if a copy-on-write 
            operation was performed, or the original block index if
            no copy-on-write was necessary.
    """
    src_block_id = block.block_id
    assert src_block_id is not None

    if self._cow_tracker.is_appendable(block):
        return src_block_id

    self._free_block_id(block)
    trg_block_id = self._allocate_block_id()

    self._cow_tracker.record_cow(src_block_id, trg_block_id)

    return trg_block_id

find_cached_blocks_prefix ¶

find_cached_blocks_prefix(
    block_hashes: List[int],
) -> List[int]

Source code in vllm/core/block/naive_block.py

def find_cached_blocks_prefix(self, block_hashes: List[int]) -> List[int]:
    # Not applicable for naive block allocator.
    return []

fork ¶

fork(last_block: Block) -> List[Block]

Creates a new sequence of blocks that shares the same underlying memory as the original sequence.

Parameters:

Name	Type	Description	Default
`last_block`	`Block`	The last block in the original sequence.	required

Returns:

Type	Description
`List[Block]`	List[Block]: The new sequence of blocks that shares the same memory as the original sequence.

Source code in vllm/core/block/naive_block.py

def fork(self, last_block: Block) -> List[Block]:
    """Creates a new sequence of blocks that shares the same underlying
    memory as the original sequence.

    Args:
        last_block (Block): The last block in the original sequence.

    Returns:
        List[Block]: The new sequence of blocks that shares the same memory
            as the original sequence.
    """
    source_blocks = get_all_blocks_recursively(last_block)

    forked_blocks: List[Block] = []
    prev_block = None
    for block in source_blocks:

        # Increment refcount for each block.
        assert block.block_id is not None
        refcount = self._refcounter.incr(block.block_id)
        assert refcount != 1, "can't fork free'd block"

        forked_block = self._block_pool.init_block(
            prev_block=prev_block,
            token_ids=block.token_ids,
            block_size=self._block_size,
            physical_block_id=block.block_id)

        forked_blocks.append(forked_block)
        prev_block = forked_blocks[-1]

    return forked_blocks

free ¶

free(block: Block, keep_block_object: bool = False) -> None

Source code in vllm/core/block/naive_block.py

def free(self, block: Block, keep_block_object: bool = False) -> None:
    # Release the physical block id
    self._free_block_id(block)

    # Release the block object
    if not keep_block_object:
        self._block_pool.free_block(block)

free_block_id ¶

free_block_id(block_id: BlockId) -> None

Source code in vllm/core/block/naive_block.py

def free_block_id(self, block_id: BlockId) -> None:
    self._free_block_id(block_id)

get_common_computed_block_ids ¶

get_common_computed_block_ids(
    computed_seq_block_ids: List[List[int]],
) -> List[int]

Determine blocks that can be skipped in prefill.

Since the naive allocator does not support prefix caching, always return an empty list.

Source code in vllm/core/block/naive_block.py

def get_common_computed_block_ids(
        self, computed_seq_block_ids: List[List[int]]) -> List[int]:
    """Determine blocks that can be skipped in prefill.

    Since the naive allocator does not support prefix caching, always return
    an empty list.
    """
    return []

get_num_free_blocks ¶

get_num_free_blocks() -> int

Source code in vllm/core/block/naive_block.py

def get_num_free_blocks(self) -> int:
    return len(self._free_block_indices)

get_num_full_blocks_touched ¶

get_num_full_blocks_touched(blocks: List[Block]) -> int

Returns the number of full blocks that will be touched by swapping in/out.

Parameters:

Name	Type	Description	Default
`blocks`	`List[Block]`	List of blocks to be swapped.	required

Returns: int: the number of full blocks that will be touched by swapping in/out the given blocks. Non full blocks are ignored when deciding the number of blocks to touch.

Source code in vllm/core/block/naive_block.py

def get_num_full_blocks_touched(self, blocks: List[Block]) -> int:
    """Returns the number of full blocks that will be touched by
    swapping in/out.

    Args:
        blocks: List of blocks to be swapped.
    Returns:
        int: the number of full blocks that will be touched by
            swapping in/out the given blocks. Non full blocks are ignored
            when deciding the number of blocks to touch.
    """
    # NOTE: for naive block, we use set to eliminate common blocks among
    # seqs, also we compare the empty slots in the mutable blocks with
    # lookahead slots to get the number of unique new block that are
    # needed.
    old_block_set = set()
    for block in blocks:
        if block.is_full:
            old_block_set.add(block)
    return len(old_block_set)

get_num_total_blocks ¶

get_num_total_blocks() -> int

Source code in vllm/core/block/naive_block.py

def get_num_total_blocks(self) -> int:
    return len(self._all_block_indices)

get_physical_block_id ¶

get_physical_block_id(absolute_id: int) -> int

Returns the zero-offset block id on certain block allocator given the absolute block id.

Parameters:

Name	Type	Description	Default
`absolute_id`	`int`	The absolute block id for the block	required

Returns:

Name	Type	Description
`int`	`int`	The zero-offset block id on certain device.

Source code in vllm/core/block/naive_block.py

def get_physical_block_id(self, absolute_id: int) -> int:
    """Returns the zero-offset block id on certain block allocator
    given the absolute block id.

    Args:
        absolute_id (int): The absolute block id for the block 
        in whole allocator.

    Returns:
        int: The zero-offset block id on certain device.
    """
    return sorted(self._all_block_indices).index(absolute_id)

get_prefix_cache_hit_rate ¶

get_prefix_cache_hit_rate() -> float

Source code in vllm/core/block/naive_block.py

def get_prefix_cache_hit_rate(self) -> float:
    return -1

mark_blocks_as_accessed ¶

mark_blocks_as_accessed(
    block_ids: List[int], now: float
) -> None

Mark blocks as accessed, used in prefix caching.

Since the naive allocator does not implement prefix caching, we do nothing.

Source code in vllm/core/block/naive_block.py

def mark_blocks_as_accessed(self, block_ids: List[int],
                            now: float) -> None:
    """Mark blocks as accessed, used in prefix caching.

    Since the naive allocator does not implement prefix caching, we do
    nothing.
    """
    pass

mark_blocks_as_computed ¶

mark_blocks_as_computed(block_ids: List[int]) -> None

Mark blocks as computed, used in prefix caching.

Since the naive allocator does not implement prefix caching, we do nothing.

Source code in vllm/core/block/naive_block.py

def mark_blocks_as_computed(self, block_ids: List[int]) -> None:
    """Mark blocks as computed, used in prefix caching.

    Since the naive allocator does not implement prefix caching, we do
    nothing.
    """
    pass

promote_to_immutable_block ¶

promote_to_immutable_block(block: Block) -> BlockId

Source code in vllm/core/block/naive_block.py

def promote_to_immutable_block(self, block: Block) -> BlockId:
    raise NotImplementedError("There is no promotion for naive blocks")

reset_prefix_cache ¶

reset_prefix_cache() -> bool

No prefix cache for naive block allocator.

Source code in vllm/core/block/naive_block.py

def reset_prefix_cache(self) -> bool:
    """No prefix cache for naive block allocator."""
    return True

swap_in ¶

swap_in(blocks: List[Block]) -> None

Source code in vllm/core/block/naive_block.py

def swap_in(self, blocks: List[Block]) -> None:
    for block in blocks:
        # Here we allocate either immutable or mutable block and then
        # extract its block_id. Note that the block object is released
        # and the block_id is assigned to "block" to allow reusing the
        # existing "block" object
        if block.is_full:
            tmp_block = self.allocate_immutable_block(
                prev_block=block.prev_block, token_ids=block.token_ids)
        else:
            tmp_block = self.allocate_mutable_block(
                prev_block=block.prev_block)
            tmp_block.append_token_ids(block.token_ids)

        block_id = tmp_block.block_id
        tmp_block.block_id = None
        self._block_pool.free_block(tmp_block)

        block.block_id = block_id  # Assign block_id

swap_out ¶

swap_out(blocks: List[Block]) -> None

Source code in vllm/core/block/naive_block.py

def swap_out(self, blocks: List[Block]) -> None:
    for block in blocks:
        self._free_block_id(block)

vllm.core.block.naive_block

Refcount module-attribute ¶

NaiveBlock ¶

_allocator instance-attribute ¶

_block_id instance-attribute ¶

_block_size instance-attribute ¶

_cow_target instance-attribute ¶

_prev_block instance-attribute ¶

_token_ids instance-attribute ¶

block_id property writable ¶

block_size property ¶

computed property writable ¶

content_hash property ¶

extra_hash property ¶

is_full property ¶

last_accessed property writable ¶

num_empty_slots property ¶

num_tokens_total property ¶

prev_block property ¶

token_ids property ¶

__init__ ¶

_append_token_ids_no_cow ¶

append_token_ids ¶

NaiveBlockAllocator ¶

_all_block_indices instance-attribute ¶

_block_pool instance-attribute ¶

_block_size instance-attribute ¶

_cow_tracker instance-attribute ¶

_free_block_indices instance-attribute ¶

_refcounter instance-attribute ¶

all_block_ids property ¶

refcounter property ¶

__init__ ¶

_allocate_block_id ¶

_free_block_id ¶

allocate_immutable_block ¶

allocate_immutable_blocks ¶

allocate_mutable_block ¶

clear_copy_on_writes ¶

cow_block_if_not_appendable ¶

find_cached_blocks_prefix ¶

fork ¶

free ¶

free_block_id ¶

get_common_computed_block_ids ¶

get_num_free_blocks ¶

get_num_full_blocks_touched ¶

get_num_total_blocks ¶

get_physical_block_id ¶

get_prefix_cache_hit_rate ¶

mark_blocks_as_accessed ¶

mark_blocks_as_computed ¶

promote_to_immutable_block ¶

reset_prefix_cache ¶

swap_in ¶

swap_out ¶

Refcount `module-attribute` ¶

_allocator `instance-attribute` ¶

_block_id `instance-attribute` ¶

_block_size `instance-attribute` ¶

_cow_target `instance-attribute` ¶

_prev_block `instance-attribute` ¶

_token_ids `instance-attribute` ¶

block_id `property` `writable` ¶

block_size `property` ¶

computed `property` `writable` ¶

content_hash `property` ¶

extra_hash `property` ¶

is_full `property` ¶

last_accessed `property` `writable` ¶

num_empty_slots `property` ¶

num_tokens_total `property` ¶

prev_block `property` ¶

token_ids `property` ¶

init ¶

_all_block_indices `instance-attribute` ¶

_block_pool `instance-attribute` ¶

_block_size `instance-attribute` ¶

_cow_tracker `instance-attribute` ¶

_free_block_indices `instance-attribute` ¶

_refcounter `instance-attribute` ¶

all_block_ids `property` ¶

refcounter `property` ¶

init ¶