vllm.model_executor.pooling_metadata

PoolingMetadata ¶

Metadata for pooling operations in the Pooler layer.

This class holds the necessary information for pooling operations, providing context for how to perform pooling and other related operations.

Attributes:

Name	Type	Description
`seq_groups`		List of (seq_ids, pooling_params).
`seq_data`		A mapping of sequence ID to additional sequence data.
`prompt_lens`		List of the lengths of each prompt.

Source code in vllm/model_executor/pooling_metadata.py

class PoolingMetadata:
    """Metadata for pooling operations in the Pooler layer.

    This class holds the necessary information for pooling operations,
    providing context for how to perform pooling and other related operations.

    Attributes:
        seq_groups: List of (seq_ids, pooling_params).
        seq_data: A mapping of sequence ID to additional sequence data.
        prompt_lens: List of the lengths of each prompt.
    """

    def __init__(
            self,
            seq_groups: list[tuple[list[int], PoolingParams]],
            seq_data: dict[int, Any],  # Specific data related to sequences
            prompt_lens: list[int],
            pooling_cursor: Optional[PoolingCursor] = None) -> None:
        self.seq_groups = seq_groups
        self.seq_data = seq_data
        self.prompt_lens = prompt_lens
        self.pooling_cursor: Optional[PoolingCursor] = pooling_cursor

    def __repr__(self) -> str:
        return ("PoolingMetadata("
                f"seq_groups={self.seq_groups}, "
                f"seq_data={self.seq_data}, "
                f"prompt_lens={self.prompt_lens})")

    def __getitem__(self, indices: slice):
        return PoolingMetadata(
            seq_groups=self.seq_groups[indices],
            seq_data=dict(list(self.seq_data.items())[indices]),
            prompt_lens=self.prompt_lens[indices],
            pooling_cursor=None
            if self.pooling_cursor is None else self.pooling_cursor[indices],
        )

    def build_pooling_cursor(self, num_scheduled_tokens: list[int],
                             device: torch.device):
        prompt_lens = torch.tensor(self.prompt_lens, device="cpu")
        self.pooling_cursor = build_pooling_cursor(num_scheduled_tokens,
                                                   prompt_lens,
                                                   device=device)

pooling_cursor `instance-attribute` ¶

pooling_cursor: Optional[PoolingCursor] = pooling_cursor

prompt_lens `instance-attribute` ¶

prompt_lens = prompt_lens

seq_data `instance-attribute` ¶

seq_data = seq_data

seq_groups `instance-attribute` ¶

seq_groups = seq_groups

getitem ¶

__getitem__(indices: slice)

Source code in vllm/model_executor/pooling_metadata.py

def __getitem__(self, indices: slice):
    return PoolingMetadata(
        seq_groups=self.seq_groups[indices],
        seq_data=dict(list(self.seq_data.items())[indices]),
        prompt_lens=self.prompt_lens[indices],
        pooling_cursor=None
        if self.pooling_cursor is None else self.pooling_cursor[indices],
    )

init ¶

__init__(
    seq_groups: list[tuple[list[int], PoolingParams]],
    seq_data: dict[int, Any],
    prompt_lens: list[int],
    pooling_cursor: Optional[PoolingCursor] = None,
) -> None

Source code in vllm/model_executor/pooling_metadata.py

def __init__(
        self,
        seq_groups: list[tuple[list[int], PoolingParams]],
        seq_data: dict[int, Any],  # Specific data related to sequences
        prompt_lens: list[int],
        pooling_cursor: Optional[PoolingCursor] = None) -> None:
    self.seq_groups = seq_groups
    self.seq_data = seq_data
    self.prompt_lens = prompt_lens
    self.pooling_cursor: Optional[PoolingCursor] = pooling_cursor

repr ¶

__repr__() -> str

Source code in vllm/model_executor/pooling_metadata.py

def __repr__(self) -> str:
    return ("PoolingMetadata("
            f"seq_groups={self.seq_groups}, "
            f"seq_data={self.seq_data}, "
            f"prompt_lens={self.prompt_lens})")

build_pooling_cursor ¶

build_pooling_cursor(
    num_scheduled_tokens: list[int], device: device
)

Source code in vllm/model_executor/pooling_metadata.py

def build_pooling_cursor(self, num_scheduled_tokens: list[int],
                         device: torch.device):
    prompt_lens = torch.tensor(self.prompt_lens, device="cpu")
    self.pooling_cursor = build_pooling_cursor(num_scheduled_tokens,
                                               prompt_lens,
                                               device=device)

PoolingTensors `dataclass` ¶

Tensors for pooling.

Source code in vllm/model_executor/pooling_metadata.py

@dataclass
class PoolingTensors:
    """Tensors for pooling."""

    prompt_lens: torch.Tensor

    @classmethod
    def from_pooling_metadata(
        cls,
        pooling_metadata: "PoolingMetadata",
        device: torch.device,
    ) -> "PoolingTensors":
        """
        Create PoolingTensors from PoolingMetadata.

        Args:
            pooling_metadata: PoolingMetadata instance to convert.
            device: Device to store the tensors.
        """
        # Convert prompt lengths to tensor
        pin_memory = is_pin_memory_available()

        prompt_lens_t = torch.tensor(
            pooling_metadata.prompt_lens,
            device="cpu",
            dtype=torch.long,
            pin_memory=pin_memory,
        )

        return cls(prompt_lens=prompt_lens_t.to(device=device,
                                                non_blocking=True), )

prompt_lens `instance-attribute` ¶

prompt_lens: Tensor

init ¶

__init__(prompt_lens: Tensor) -> None

from_pooling_metadata `classmethod` ¶

from_pooling_metadata(
    pooling_metadata: PoolingMetadata, device: device
) -> PoolingTensors

Create PoolingTensors from PoolingMetadata.

Parameters:

Name	Type	Description	Default
`pooling_metadata`	`PoolingMetadata`	PoolingMetadata instance to convert.	required
`device`	`device`	Device to store the tensors.	required

Source code in vllm/model_executor/pooling_metadata.py

@classmethod
def from_pooling_metadata(
    cls,
    pooling_metadata: "PoolingMetadata",
    device: torch.device,
) -> "PoolingTensors":
    """
    Create PoolingTensors from PoolingMetadata.

    Args:
        pooling_metadata: PoolingMetadata instance to convert.
        device: Device to store the tensors.
    """
    # Convert prompt lengths to tensor
    pin_memory = is_pin_memory_available()

    prompt_lens_t = torch.tensor(
        pooling_metadata.prompt_lens,
        device="cpu",
        dtype=torch.long,
        pin_memory=pin_memory,
    )

    return cls(prompt_lens=prompt_lens_t.to(device=device,
                                            non_blocking=True), )

vllm.model_executor.pooling_metadata

PoolingMetadata ¶

pooling_cursor instance-attribute ¶

prompt_lens instance-attribute ¶

seq_data instance-attribute ¶

seq_groups instance-attribute ¶

__getitem__ ¶

__init__ ¶

__repr__ ¶

build_pooling_cursor ¶

PoolingTensors dataclass ¶

prompt_lens instance-attribute ¶

__init__ ¶

from_pooling_metadata classmethod ¶

pooling_cursor `instance-attribute` ¶

prompt_lens `instance-attribute` ¶

seq_data `instance-attribute` ¶

seq_groups `instance-attribute` ¶

getitem ¶

init ¶

repr ¶

PoolingTensors `dataclass` ¶

prompt_lens `instance-attribute` ¶

init ¶

from_pooling_metadata `classmethod` ¶