vllm.model_executor.layers.fused_moe.cpu_fused_moe
CPUFusedMOE ¶
Source code in vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
__call__ ¶
__call__(
layer: Module,
x: Tensor,
use_grouped_topk: bool,
top_k: int,
router_logits: Tensor,
renormalize: bool,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
global_num_experts: int = -1,
expert_map: Optional[Tensor] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[Tensor] = None,
apply_router_weight_on_input: bool = False,
activation: str = "silu",
) -> Tensor
Source code in vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
IPEXFusedMOE ¶
Source code in vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
__call__ ¶
__call__(
layer: Module,
x: Tensor,
use_grouped_topk: bool,
top_k: int,
router_logits: Tensor,
renormalize: bool,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
global_num_experts: int = -1,
expert_map: Optional[Tensor] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[Tensor] = None,
apply_router_weight_on_input: bool = False,
activation: str = "silu",
) -> Tensor
Source code in vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
SGLFusedMOE ¶
Source code in vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
__call__ ¶
__call__(
layer: Module,
x: Tensor,
use_grouped_topk: bool,
top_k: int,
router_logits: Tensor,
renormalize: bool,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
global_num_experts: int = -1,
expert_map: Optional[Tensor] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[Tensor] = None,
apply_router_weight_on_input: bool = False,
activation: str = "silu",
) -> Tensor
Source code in vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
grouped_topk ¶
grouped_topk(
hidden_states: Tensor,
gating_output: Tensor,
topk: int,
renormalize: bool,
num_expert_group: int = 0,
topk_group: int = 0,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[Tensor] = None,
) -> tuple[Tensor, Tensor]
Source code in vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
select_experts ¶
select_experts(
hidden_states: Tensor,
router_logits: Tensor,
top_k: int,
use_grouped_topk: bool,
renormalize: bool,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[Tensor] = None,
) -> tuple[Tensor, Tensor]