Bases: QuantizationConfig
Config class for FP8 using Intel Neural Compressor.
Source code in vllm/model_executor/layers/quantization/inc.py
| class INCConfig(QuantizationConfig):
"""Config class for FP8 using Intel Neural Compressor."""
@classmethod
def get_name(cls) -> QuantizationMethods:
return "inc"
@classmethod
def get_supported_act_dtypes(cls) -> list[torch.dtype]:
return [torch.bfloat16]
@classmethod
def from_config(cls, config: dict[str, Any]) -> "INCConfig":
raise AssertionError
def get_quant_method(self, layer: torch.nn.Module,
prefix: str) -> Optional["QuantizeMethodBase"]:
if isinstance(layer, LinearBase):
return UnquantizedLinearMethod()
elif isinstance(layer, FusedMoE):
return UnquantizedFusedMoEMethod(layer.moe_config)
return None
@classmethod
def get_min_capability(cls) -> int:
raise AssertionError
@staticmethod
def get_config_filenames() -> list[str]:
return []
|
from_config classmethod
Source code in vllm/model_executor/layers/quantization/inc.py
| @classmethod
def from_config(cls, config: dict[str, Any]) -> "INCConfig":
raise AssertionError
|
get_config_filenames staticmethod
get_config_filenames() -> list[str]
Source code in vllm/model_executor/layers/quantization/inc.py
| @staticmethod
def get_config_filenames() -> list[str]:
return []
|
get_min_capability classmethod
get_min_capability() -> int
Source code in vllm/model_executor/layers/quantization/inc.py
| @classmethod
def get_min_capability(cls) -> int:
raise AssertionError
|
get_name classmethod
Source code in vllm/model_executor/layers/quantization/inc.py
| @classmethod
def get_name(cls) -> QuantizationMethods:
return "inc"
|
get_quant_method
Source code in vllm/model_executor/layers/quantization/inc.py
| def get_quant_method(self, layer: torch.nn.Module,
prefix: str) -> Optional["QuantizeMethodBase"]:
if isinstance(layer, LinearBase):
return UnquantizedLinearMethod()
elif isinstance(layer, FusedMoE):
return UnquantizedFusedMoEMethod(layer.moe_config)
return None
|
get_supported_act_dtypes classmethod
Source code in vllm/model_executor/layers/quantization/inc.py
| @classmethod
def get_supported_act_dtypes(cls) -> list[torch.dtype]:
return [torch.bfloat16]
|