vllm.model_executor.models.bert
BertAttention ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
output instance-attribute
¶
output = BertSelfOutput(
hidden_size=hidden_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
self instance-attribute
¶
self = BertSelfAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__ ¶
__init__(
hidden_size: int,
num_attention_heads: int,
layer_norm_eps: float,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertEmbedding ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
position_embeddings instance-attribute
¶
position_embeddings = VocabParallelEmbedding(
max_position_embeddings, hidden_size
)
token_type_embeddings instance-attribute
¶
token_type_embeddings = VocabParallelEmbedding(
type_vocab_size, hidden_size
)
word_embeddings instance-attribute
¶
word_embeddings = VocabParallelEmbedding(
vocab_size, hidden_size
)
__init__ ¶
Source code in vllm/model_executor/models/bert.py
forward ¶
Source code in vllm/model_executor/models/bert.py
BertEmbeddingModel ¶
Bases: Module
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model | An instance of BertModel used for forward operations. | |
_pooler | An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
model instance-attribute
¶
model = _build_model(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "model"),
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
_build_model ¶
_build_model(
vllm_config: VllmConfig, prefix: str = ""
) -> BertModel
_build_pooler ¶
_build_pooler(pooler_config: PoolerConfig) -> Pooler
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertEncoder ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
layer instance-attribute
¶
layer = ModuleList(
[
(
BertLayer(
config=config,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.layer.{layer_idx}",
)
)
for layer_idx in (range(num_hidden_layers))
]
)
__init__ ¶
__init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
BertForSequenceClassification ¶
Bases: Module
, SupportsCrossEncoding
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model | An instance of BertModel used for forward operations. | |
_pooler | An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
bert instance-attribute
¶
bert = BertPoolingModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "bert"),
embedding_class=BertEmbedding,
)
pooler instance-attribute
¶
pooler = DispatchPooler(
{
"encode": for_encode(pooler_config),
"classify": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_seq_cls(model_config),
),
"score": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_cross_encoder(model_config),
),
}
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
BertIntermediate ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = ColumnParallelLinear(
input_size=hidden_size,
output_size=intermediate_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
intermediate_size: int,
hidden_act: str,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertLayer ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attention instance-attribute
¶
attention = BertAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
layer_norm_eps=layer_norm_eps,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attention",
)
intermediate instance-attribute
¶
intermediate = BertIntermediate(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
hidden_act=hidden_act,
quant_config=quant_config,
prefix=f"{prefix}.intermediate",
)
output instance-attribute
¶
output = BertOutput(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__ ¶
__init__(
config: BertConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertModel ¶
Bases: Module
, SupportsQuant
Source code in vllm/model_executor/models/bert.py
encoder instance-attribute
¶
encoder = BertEncoder(
vllm_config=vllm_config, prefix=f"{prefix}.encoder"
)
packed_modules_mapping class-attribute
instance-attribute
¶
__init__ ¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
_load_weights ¶
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertOutput ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
intermediate_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertPooler ¶
Bases: Pooler
Source code in vllm/model_executor/models/bert.py
__init__ ¶
forward ¶
forward(
hidden_states: Union[Tensor, list[Tensor]],
pooling_metadata: PoolingMetadata,
) -> Union[Tensor, list[Tensor]]
Source code in vllm/model_executor/models/bert.py
get_pooling_updates ¶
get_pooling_updates(
task: PoolingTask,
) -> PoolingParamsUpdate
get_supported_tasks ¶
get_supported_tasks() -> Set[PoolingTask]
BertPoolingModel ¶
Bases: BertModel
Source code in vllm/model_executor/models/bert.py
__init__ ¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertSelfAttention ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attn instance-attribute
¶
attn = EncoderOnlyAttention(
num_heads=num_heads,
head_size=head_dim,
scale=scaling,
num_kv_heads=num_kv_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
)
qkv_proj instance-attribute
¶
qkv_proj = QKVParallelLinear(
hidden_size=hidden_size,
head_size=head_dim,
total_num_heads=total_num_heads,
total_num_kv_heads=total_num_kv_heads,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.qkv_proj",
)
__init__ ¶
__init__(
hidden_size: int,
num_attention_heads: int,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertSelfOutput ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = RowParallelLinear(
input_size=hidden_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)