vllm.model_executor.models.aimv2
AIMv2Attention ¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
proj instance-attribute
¶
proj = RowParallelLinear(
input_size=embed_dim,
output_size=embed_dim,
bias=use_bias,
quant_config=quant_config,
prefix=f"{prefix}.proj",
)
qkv instance-attribute
¶
qkv = QKVParallelLinear(
hidden_size=embed_dim,
head_size=head_dim,
total_num_heads=num_heads,
bias=qkv_bias,
quant_config=quant_config,
prefix=f"{prefix}.qkv",
)
__init__ ¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
AIMv2Block ¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
attn instance-attribute
¶
attn = AIMv2Attention(
config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
)
mlp instance-attribute
¶
mlp = AIMv2SwiGLUFFN(
config,
quant_config=quant_config,
prefix=f"{prefix}.mlp",
)
__init__ ¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
AIMv2Model ¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
trunk instance-attribute
¶
trunk = AIMv2Transformer(
config,
quant_config=quant_config,
require_post_norm=require_post_norm,
prefix=f"{prefix}.trunk",
)
__init__ ¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/aimv2.py
forward ¶
load_weights ¶
Source code in vllm/model_executor/models/aimv2.py
AIMv2PatchEmbed ¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
proj instance-attribute
¶
proj = Conv2d(
num_channels,
hidden_size,
kernel_size=(patch_size, patch_size),
stride=(patch_size, patch_size),
)
AIMv2SwiGLUFFN ¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
fc13 instance-attribute
¶
fc13 = MergedColumnParallelLinear(
in_features,
[hidden_features] * 2,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.fc13",
)
fc2 instance-attribute
¶
fc2 = RowParallelLinear(
input_size=hidden_features,
output_size=in_features,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.fc2",
)
__init__ ¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
AIMv2Transformer ¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
blocks instance-attribute
¶
blocks = ModuleList(
[
(
AIMv2Block(
config,
quant_config,
prefix=f"{prefix}.blocks.{i}",
)
)
for i in (range(num_hidden_layers))
]
)
__init__ ¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/aimv2.py
forward ¶
Source code in vllm/model_executor/models/aimv2.py
AIMv2ViTPreprocessor ¶
Bases: Module