Skip to content

vllm.v1.sample.logits_processor.interface

AddedRequest module-attribute

AddedRequest = tuple[
    int, SamplingParams, list[int], list[int]
]

MovedRequest module-attribute

MovedRequest = tuple[int, int, MoveDirectionality]

RemovedRequest module-attribute

RemovedRequest = int

BatchUpdate dataclass

Persistent batch state change info for logitsprocs

Source code in vllm/v1/sample/logits_processor/interface.py
@dataclass(frozen=True)
class BatchUpdate:
    """Persistent batch state change info for logitsprocs"""
    batch_size: int  # Current num reqs in batch

    # Metadata for requests added to, removed from, and moved
    # within the persistent batch.
    #
    # Key assumption: the `output_tok_ids` list (which is an element of each
    # tuple in `added`) is a reference to the request's running output tokens
    # list; via this reference, the logits processors always see the latest
    # list of generated output tokens
    removed: Sequence[RemovedRequest]
    moved: Sequence[MovedRequest]
    added: Sequence[AddedRequest]

added instance-attribute

batch_size instance-attribute

batch_size: int

moved instance-attribute

removed instance-attribute

__init__

__init__(
    batch_size: int,
    removed: Sequence[RemovedRequest],
    moved: Sequence[MovedRequest],
    added: Sequence[AddedRequest],
) -> None

LogitsProcessor

Bases: ABC

Source code in vllm/v1/sample/logits_processor/interface.py
class LogitsProcessor(ABC):

    @abstractmethod
    def __init__(self, vllm_config: "VllmConfig", device: torch.device,
                 is_pin_memory: bool) -> None:
        raise NotImplementedError

    @abstractmethod
    def apply(self, logits: torch.Tensor) -> torch.Tensor:
        raise NotImplementedError

    @abstractmethod
    def is_argmax_invariant(self) -> bool:
        """True if logits processor has no impact on the
        argmax computation in greedy sampling.
        NOTE: may or may not have the same value for all
        instances of a given LogitsProcessor subclass,
        depending on subclass implementation.
        """
        raise NotImplementedError

    @abstractmethod
    def update_state(
        self,
        batch_update: Optional["BatchUpdate"],
    ) -> None:
        """Called when there are new output tokens, prior
        to each forward pass.

        Args:
            batch_update is non-None iff there have been
            changes to the batch makeup.
        """
        raise NotImplementedError

__init__ abstractmethod

__init__(
    vllm_config: VllmConfig,
    device: device,
    is_pin_memory: bool,
) -> None
Source code in vllm/v1/sample/logits_processor/interface.py
@abstractmethod
def __init__(self, vllm_config: "VllmConfig", device: torch.device,
             is_pin_memory: bool) -> None:
    raise NotImplementedError

apply abstractmethod

apply(logits: Tensor) -> Tensor
Source code in vllm/v1/sample/logits_processor/interface.py
@abstractmethod
def apply(self, logits: torch.Tensor) -> torch.Tensor:
    raise NotImplementedError

is_argmax_invariant abstractmethod

is_argmax_invariant() -> bool

True if logits processor has no impact on the argmax computation in greedy sampling. NOTE: may or may not have the same value for all instances of a given LogitsProcessor subclass, depending on subclass implementation.

Source code in vllm/v1/sample/logits_processor/interface.py
@abstractmethod
def is_argmax_invariant(self) -> bool:
    """True if logits processor has no impact on the
    argmax computation in greedy sampling.
    NOTE: may or may not have the same value for all
    instances of a given LogitsProcessor subclass,
    depending on subclass implementation.
    """
    raise NotImplementedError

update_state abstractmethod

update_state(batch_update: Optional[BatchUpdate]) -> None

Called when there are new output tokens, prior to each forward pass.

Source code in vllm/v1/sample/logits_processor/interface.py
@abstractmethod
def update_state(
    self,
    batch_update: Optional["BatchUpdate"],
) -> None:
    """Called when there are new output tokens, prior
    to each forward pass.

    Args:
        batch_update is non-None iff there have been
        changes to the batch makeup.
    """
    raise NotImplementedError

MoveDirectionality

Bases: Enum

Source code in vllm/v1/sample/logits_processor/interface.py
class MoveDirectionality(Enum):
    # One-way i1->i2 req move within batch
    UNIDIRECTIONAL = auto()
    # Two-way i1<->i2 req swap within batch
    SWAP = auto()

SWAP class-attribute instance-attribute

SWAP = auto()

UNIDIRECTIONAL class-attribute instance-attribute

UNIDIRECTIONAL = auto()