Skip to content

vllm.model_executor.models.rvl

RForConditionalGeneration

Bases: LlavaOnevisionForConditionalGeneration

Source code in vllm/model_executor/models/rvl.py
@MULTIMODAL_REGISTRY.register_processor(
    LlavaNextMultiModalProcessor,
    info=RVLProcessingInfo,
    dummy_inputs=RVLDummyInputsBuilder,
)
class RForConditionalGeneration(LlavaOnevisionForConditionalGeneration):

    hf_to_vllm_mapper = WeightsMapper(
        orig_to_new_prefix={
            # mapping for new names in checkpoint saved after transformers
            # v4.52
            "model.language_model.": "language_model.model.",
            "model.vision_tower.": "vision_tower.",
            "model.multi_modal_projector.": "multi_modal_projector.",
            "model.image_newline": "image_newline",
            "lm_head.": "language_model.lm_head.",
        })

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
        super().__init__(vllm_config=vllm_config, prefix=prefix)
        config = vllm_config.model_config.hf_config
        self.multi_modal_projector = RVLMultiModalProjector(config)

hf_to_vllm_mapper class-attribute instance-attribute

hf_to_vllm_mapper = WeightsMapper(
    orig_to_new_prefix={
        "model.language_model.": "language_model.model.",
        "model.vision_tower.": "vision_tower.",
        "model.multi_modal_projector.": "multi_modal_projector.",
        "model.image_newline": "image_newline",
        "lm_head.": "language_model.lm_head.",
    }
)

multi_modal_projector instance-attribute

multi_modal_projector = RVLMultiModalProjector(config)

__init__

__init__(
    *, vllm_config: VllmConfig, prefix: str = ""
) -> None
Source code in vllm/model_executor/models/rvl.py
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
    super().__init__(vllm_config=vllm_config, prefix=prefix)
    config = vllm_config.model_config.hf_config
    self.multi_modal_projector = RVLMultiModalProjector(config)

RVLDummyInputsBuilder

Bases: LlavaDummyInputsBuilder[RVLProcessingInfo]

Source code in vllm/model_executor/models/rvl.py
class RVLDummyInputsBuilder(LlavaDummyInputsBuilder[RVLProcessingInfo]):

    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
        num_images = mm_counts.get("image", 0)
        image_token = "<image>"

        return image_token * num_images

    def get_dummy_mm_data(
        self,
        seq_len: int,
        mm_counts: Mapping[str, int],
    ) -> MultiModalDataDict:
        num_images = mm_counts.get("image", 0)

        target_width, target_height = (
            self.info.get_image_size_with_most_features())

        return {
            "image":
            self._get_dummy_images(width=target_width,
                                   height=target_height,
                                   num_images=num_images),
        }

get_dummy_mm_data

get_dummy_mm_data(
    seq_len: int, mm_counts: Mapping[str, int]
) -> MultiModalDataDict
Source code in vllm/model_executor/models/rvl.py
def get_dummy_mm_data(
    self,
    seq_len: int,
    mm_counts: Mapping[str, int],
) -> MultiModalDataDict:
    num_images = mm_counts.get("image", 0)

    target_width, target_height = (
        self.info.get_image_size_with_most_features())

    return {
        "image":
        self._get_dummy_images(width=target_width,
                               height=target_height,
                               num_images=num_images),
    }

get_dummy_text

get_dummy_text(mm_counts: Mapping[str, int]) -> str
Source code in vllm/model_executor/models/rvl.py
def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
    num_images = mm_counts.get("image", 0)
    image_token = "<image>"

    return image_token * num_images

RVLMultiModalProjector

Bases: Module

Source code in vllm/model_executor/models/rvl.py
class RVLMultiModalProjector(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.pre_norm = nn.LayerNorm(config.vision_config.hidden_size,
                                     eps=1e-06)
        self.linear_1 = nn.Linear(
            config.vision_config.hidden_size,
            config.text_config.hidden_size,
            bias=True,
        )
        self.act = GELUActivation()
        self.linear_2 = nn.Linear(
            config.text_config.hidden_size,
            config.text_config.hidden_size,
            bias=True,
        )

    def forward(self, image_feature: torch.Tensor) -> torch.Tensor:
        image_feature = self.pre_norm(image_feature)
        hidden_states = self.linear_1(image_feature)
        hidden_states = self.act(hidden_states)
        hidden_states = self.linear_2(hidden_states)

        return hidden_states

act instance-attribute

act = GELUActivation()

linear_1 instance-attribute

linear_1 = Linear(hidden_size, hidden_size, bias=True)

linear_2 instance-attribute

linear_2 = Linear(hidden_size, hidden_size, bias=True)

pre_norm instance-attribute

pre_norm = LayerNorm(hidden_size, eps=1e-06)

__init__

__init__(config)
Source code in vllm/model_executor/models/rvl.py
def __init__(self, config):
    super().__init__()
    self.pre_norm = nn.LayerNorm(config.vision_config.hidden_size,
                                 eps=1e-06)
    self.linear_1 = nn.Linear(
        config.vision_config.hidden_size,
        config.text_config.hidden_size,
        bias=True,
    )
    self.act = GELUActivation()
    self.linear_2 = nn.Linear(
        config.text_config.hidden_size,
        config.text_config.hidden_size,
        bias=True,
    )

forward

forward(image_feature: Tensor) -> Tensor
Source code in vllm/model_executor/models/rvl.py
def forward(self, image_feature: torch.Tensor) -> torch.Tensor:
    image_feature = self.pre_norm(image_feature)
    hidden_states = self.linear_1(image_feature)
    hidden_states = self.act(hidden_states)
    hidden_states = self.linear_2(hidden_states)

    return hidden_states

RVLProcessingInfo

Bases: LlavaNextProcessingInfo

Source code in vllm/model_executor/models/rvl.py
class RVLProcessingInfo(LlavaNextProcessingInfo):

    def get_hf_config(self):
        return self.ctx.get_hf_config()

    def get_hf_processor(self, **kwargs: object):
        return self.ctx.get_hf_processor(**kwargs)

get_hf_config

get_hf_config()
Source code in vllm/model_executor/models/rvl.py
def get_hf_config(self):
    return self.ctx.get_hf_config()

get_hf_processor

get_hf_processor(**kwargs: object)
Source code in vllm/model_executor/models/rvl.py
def get_hf_processor(self, **kwargs: object):
    return self.ctx.get_hf_processor(**kwargs)