Skip to content

vllm.v1.engine.mm_input_cache

MultiModalInputCacheClient

Used by P0 to check whether multi-modal kwargs are cached in P1.

Source code in vllm/v1/engine/mm_input_cache.py
class MultiModalInputCacheClient:
    """Used by P0 to check whether multi-modal kwargs are cached in P1."""

    def __init__(self, model_config: "ModelConfig",
                 mm_registry: MultiModalRegistry) -> None:
        super().__init__()

        self.enabled = mm_registry.enable_mm_input_cache(model_config)
        self.mm_cache = MultiModalCache.get_lru_cache(
            model_config.get_mm_input_cache_gb(),
            MultiModalCacheItemMetadata,
        )

    def get_and_update(
        self,
        mm_kwargs: Sequence[MultiModalKwargsItem],
        mm_hashes: list[str],
    ) -> list[Optional[MultiModalKwargsItem]]:
        if not self.enabled:
            return list(mm_kwargs)

        assert len(mm_kwargs) == len(mm_hashes)

        out_mm_items = list[Optional[MultiModalKwargsItem]]()
        for mm_item, mm_hash in zip(mm_kwargs, mm_hashes):
            if self.mm_cache.get(mm_hash) is not None:
                out_mm_items.append(None)
            else:
                self.mm_cache[mm_hash] = \
                    MultiModalCacheItemMetadata.wraps(mm_item)
                out_mm_items.append(mm_item)

        return out_mm_items

    def reset(self) -> None:
        self.mm_cache.clear()

enabled instance-attribute

enabled = enable_mm_input_cache(model_config)

mm_cache instance-attribute

mm_cache = get_lru_cache(
    get_mm_input_cache_gb(), MultiModalCacheItemMetadata
)

__init__

__init__(
    model_config: ModelConfig,
    mm_registry: MultiModalRegistry,
) -> None
Source code in vllm/v1/engine/mm_input_cache.py
def __init__(self, model_config: "ModelConfig",
             mm_registry: MultiModalRegistry) -> None:
    super().__init__()

    self.enabled = mm_registry.enable_mm_input_cache(model_config)
    self.mm_cache = MultiModalCache.get_lru_cache(
        model_config.get_mm_input_cache_gb(),
        MultiModalCacheItemMetadata,
    )

get_and_update

get_and_update(
    mm_kwargs: Sequence[MultiModalKwargsItem],
    mm_hashes: list[str],
) -> list[Optional[MultiModalKwargsItem]]
Source code in vllm/v1/engine/mm_input_cache.py
def get_and_update(
    self,
    mm_kwargs: Sequence[MultiModalKwargsItem],
    mm_hashes: list[str],
) -> list[Optional[MultiModalKwargsItem]]:
    if not self.enabled:
        return list(mm_kwargs)

    assert len(mm_kwargs) == len(mm_hashes)

    out_mm_items = list[Optional[MultiModalKwargsItem]]()
    for mm_item, mm_hash in zip(mm_kwargs, mm_hashes):
        if self.mm_cache.get(mm_hash) is not None:
            out_mm_items.append(None)
        else:
            self.mm_cache[mm_hash] = \
                MultiModalCacheItemMetadata.wraps(mm_item)
            out_mm_items.append(mm_item)

    return out_mm_items

reset

reset() -> None
Source code in vllm/v1/engine/mm_input_cache.py
def reset(self) -> None:
    self.mm_cache.clear()

MultiModalInputCacheServer

Used by P1 to avoid requiring past multi-modal kwargs from P0.

Source code in vllm/v1/engine/mm_input_cache.py
class MultiModalInputCacheServer:
    """Used by P1 to avoid requiring past multi-modal kwargs from P0."""

    def __init__(self, model_config: "ModelConfig",
                 mm_registry: MultiModalRegistry) -> None:
        super().__init__()

        self.enabled = mm_registry.enable_mm_input_cache(model_config)
        self.mm_cache = MultiModalCache.get_lru_cache(
            model_config.get_mm_input_cache_gb(),
            MultiModalKwargsItem,
        )

    def get_and_update(
        self,
        mm_kwargs: Sequence[Optional[MultiModalKwargsItem]],
        mm_hashes: list[str],
    ) -> list[MultiModalKwargsItem]:
        if not self.enabled:
            mm_kwargs_lst = list(mm_kwargs)
            assert is_list_of(mm_kwargs_lst, MultiModalKwargsItem)
            return mm_kwargs_lst

        assert len(mm_kwargs) == len(mm_hashes)

        out_mm_items = list[MultiModalKwargsItem]()
        for mm_item, mm_hash in zip(mm_kwargs, mm_hashes):
            if mm_item is None:
                out_mm_items.append(self.mm_cache[mm_hash])
            else:
                self.mm_cache[mm_hash] = mm_item
                out_mm_items.append(mm_item)

        return out_mm_items

    def reset(self) -> None:
        self.mm_cache.clear()

enabled instance-attribute

enabled = enable_mm_input_cache(model_config)

mm_cache instance-attribute

mm_cache = get_lru_cache(
    get_mm_input_cache_gb(), MultiModalKwargsItem
)

__init__

__init__(
    model_config: ModelConfig,
    mm_registry: MultiModalRegistry,
) -> None
Source code in vllm/v1/engine/mm_input_cache.py
def __init__(self, model_config: "ModelConfig",
             mm_registry: MultiModalRegistry) -> None:
    super().__init__()

    self.enabled = mm_registry.enable_mm_input_cache(model_config)
    self.mm_cache = MultiModalCache.get_lru_cache(
        model_config.get_mm_input_cache_gb(),
        MultiModalKwargsItem,
    )

get_and_update

get_and_update(
    mm_kwargs: Sequence[Optional[MultiModalKwargsItem]],
    mm_hashes: list[str],
) -> list[MultiModalKwargsItem]
Source code in vllm/v1/engine/mm_input_cache.py
def get_and_update(
    self,
    mm_kwargs: Sequence[Optional[MultiModalKwargsItem]],
    mm_hashes: list[str],
) -> list[MultiModalKwargsItem]:
    if not self.enabled:
        mm_kwargs_lst = list(mm_kwargs)
        assert is_list_of(mm_kwargs_lst, MultiModalKwargsItem)
        return mm_kwargs_lst

    assert len(mm_kwargs) == len(mm_hashes)

    out_mm_items = list[MultiModalKwargsItem]()
    for mm_item, mm_hash in zip(mm_kwargs, mm_hashes):
        if mm_item is None:
            out_mm_items.append(self.mm_cache[mm_hash])
        else:
            self.mm_cache[mm_hash] = mm_item
            out_mm_items.append(mm_item)

    return out_mm_items

reset

reset() -> None
Source code in vllm/v1/engine/mm_input_cache.py
def reset(self) -> None:
    self.mm_cache.clear()