vllm.v1.sample.logits_processor.interface

AddedRequest `module-attribute` ¶

AddedRequest = tuple[
    int, SamplingParams, list[int], list[int]
]

MovedRequest `module-attribute` ¶

MovedRequest = tuple[int, int, MoveDirectionality]

RemovedRequest `module-attribute` ¶

RemovedRequest = int

BatchUpdate `dataclass` ¶

Persistent batch state change info for logitsprocs

Source code in vllm/v1/sample/logits_processor/interface.py

@dataclass(frozen=True)
class BatchUpdate:
    """Persistent batch state change info for logitsprocs"""
    batch_size: int  # Current num reqs in batch

    # Metadata for requests added to, removed from, and moved
    # within the persistent batch.
    #
    # Key assumption: the `output_tok_ids` list (which is an element of each
    # tuple in `added`) is a reference to the request's running output tokens
    # list; via this reference, the logits processors always see the latest
    # list of generated output tokens
    removed: Sequence[RemovedRequest]
    moved: Sequence[MovedRequest]
    added: Sequence[AddedRequest]

added `instance-attribute` ¶

added: Sequence[AddedRequest]

batch_size `instance-attribute` ¶

batch_size: int

moved `instance-attribute` ¶

moved: Sequence[MovedRequest]

removed `instance-attribute` ¶

removed: Sequence[RemovedRequest]

init ¶

__init__(
    batch_size: int,
    removed: Sequence[RemovedRequest],
    moved: Sequence[MovedRequest],
    added: Sequence[AddedRequest],
) -> None

LogitsProcessor ¶

Bases: ABC

Source code in vllm/v1/sample/logits_processor/interface.py

class LogitsProcessor(ABC):

    @abstractmethod
    def __init__(self, vllm_config: "VllmConfig", device: torch.device,
                 is_pin_memory: bool) -> None:
        raise NotImplementedError

    @abstractmethod
    def apply(self, logits: torch.Tensor) -> torch.Tensor:
        raise NotImplementedError

    @abstractmethod
    def is_argmax_invariant(self) -> bool:
        """True if logits processor has no impact on the
        argmax computation in greedy sampling.
        NOTE: may or may not have the same value for all
        instances of a given LogitsProcessor subclass,
        depending on subclass implementation.
        """
        raise NotImplementedError

    @abstractmethod
    def update_state(
        self,
        batch_update: Optional["BatchUpdate"],
    ) -> None:
        """Called when there are new output tokens, prior
        to each forward pass.

        Args:
            batch_update is non-None iff there have been
            changes to the batch makeup.
        """
        raise NotImplementedError

init `abstractmethod` ¶

__init__(
    vllm_config: VllmConfig,
    device: device,
    is_pin_memory: bool,
) -> None

Source code in vllm/v1/sample/logits_processor/interface.py

@abstractmethod
def __init__(self, vllm_config: "VllmConfig", device: torch.device,
             is_pin_memory: bool) -> None:
    raise NotImplementedError

apply `abstractmethod` ¶

apply(logits: Tensor) -> Tensor

Source code in vllm/v1/sample/logits_processor/interface.py

@abstractmethod
def apply(self, logits: torch.Tensor) -> torch.Tensor:
    raise NotImplementedError

is_argmax_invariant `abstractmethod` ¶

is_argmax_invariant() -> bool

True if logits processor has no impact on the argmax computation in greedy sampling. NOTE: may or may not have the same value for all instances of a given LogitsProcessor subclass, depending on subclass implementation.

Source code in vllm/v1/sample/logits_processor/interface.py

@abstractmethod
def is_argmax_invariant(self) -> bool:
    """True if logits processor has no impact on the
    argmax computation in greedy sampling.
    NOTE: may or may not have the same value for all
    instances of a given LogitsProcessor subclass,
    depending on subclass implementation.
    """
    raise NotImplementedError

update_state `abstractmethod` ¶

update_state(batch_update: Optional[BatchUpdate]) -> None

Called when there are new output tokens, prior to each forward pass.

Source code in vllm/v1/sample/logits_processor/interface.py

@abstractmethod
def update_state(
    self,
    batch_update: Optional["BatchUpdate"],
) -> None:
    """Called when there are new output tokens, prior
    to each forward pass.

    Args:
        batch_update is non-None iff there have been
        changes to the batch makeup.
    """
    raise NotImplementedError

MoveDirectionality ¶

Bases: Enum

Source code in vllm/v1/sample/logits_processor/interface.py

class MoveDirectionality(Enum):
    # One-way i1->i2 req move within batch
    UNIDIRECTIONAL = auto()
    # Two-way i1<->i2 req swap within batch
    SWAP = auto()

SWAP `class-attribute` `instance-attribute` ¶

SWAP = auto()

UNIDIRECTIONAL `class-attribute` `instance-attribute` ¶

UNIDIRECTIONAL = auto()

vllm.v1.sample.logits_processor.interface

AddedRequest module-attribute ¶

MovedRequest module-attribute ¶

RemovedRequest module-attribute ¶

BatchUpdate dataclass ¶

added instance-attribute ¶

batch_size instance-attribute ¶

moved instance-attribute ¶

removed instance-attribute ¶

__init__ ¶

LogitsProcessor ¶

__init__ abstractmethod ¶

apply abstractmethod ¶

is_argmax_invariant abstractmethod ¶

update_state abstractmethod ¶

MoveDirectionality ¶

SWAP class-attribute instance-attribute ¶

UNIDIRECTIONAL class-attribute instance-attribute ¶

AddedRequest `module-attribute` ¶

MovedRequest `module-attribute` ¶

RemovedRequest `module-attribute` ¶

BatchUpdate `dataclass` ¶

added `instance-attribute` ¶

batch_size `instance-attribute` ¶

moved `instance-attribute` ¶

removed `instance-attribute` ¶

init ¶

init `abstractmethod` ¶

apply `abstractmethod` ¶

is_argmax_invariant `abstractmethod` ¶

update_state `abstractmethod` ¶

SWAP `class-attribute` `instance-attribute` ¶

UNIDIRECTIONAL `class-attribute` `instance-attribute` ¶