Skip to content

vllm.model_executor.layers.rotary_embedding.dynamic_ntk_scaling_rope

DynamicNTKScalingRotaryEmbedding

Bases: RotaryEmbedding

RotaryEmbedding extended with Dynamic NTK scaling.

Credits to the Reddit users /u/bloc97 and /u/emozilla

Source code in vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding):
    """RotaryEmbedding extended with Dynamic NTK scaling.

    Credits to the Reddit users /u/bloc97 and /u/emozilla
    """

    def __init__(
        self,
        head_size: int,
        rotary_dim: int,
        max_position_embeddings: int,
        base: float,
        is_neox_style: bool,
        scaling_factor: float,
        dtype: torch.dtype,
    ) -> None:
        self.scaling_factor = scaling_factor
        super().__init__(head_size, rotary_dim, max_position_embeddings, base,
                         is_neox_style, dtype)

    def _compute_cos_sin_cache(self) -> torch.Tensor:
        # NOTE(woosuk): self.max_position_embeddings is the original
        # maximum length before applying the rope scaling.
        # Thus, the maximum length after applying the rope scaling is
        # self.max_position_embeddings * self.scaling_factor.
        max_len = self.max_position_embeddings * self.scaling_factor
        base = self.base * (
            (self.scaling_factor * max_len / self.max_position_embeddings) -
            (self.scaling_factor - 1))**(self.rotary_dim /
                                         (self.rotary_dim - 2))
        inv_freq = self._compute_inv_freq(base)
        t = torch.arange(max_len, dtype=torch.float)

        freqs = torch.einsum("i,j -> ij", t, inv_freq)
        cos = freqs.cos()
        sin = freqs.sin()
        cache = torch.cat((cos, sin), dim=-1)
        return cache

scaling_factor instance-attribute

scaling_factor = scaling_factor

__init__

__init__(
    head_size: int,
    rotary_dim: int,
    max_position_embeddings: int,
    base: float,
    is_neox_style: bool,
    scaling_factor: float,
    dtype: dtype,
) -> None
Source code in vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
def __init__(
    self,
    head_size: int,
    rotary_dim: int,
    max_position_embeddings: int,
    base: float,
    is_neox_style: bool,
    scaling_factor: float,
    dtype: torch.dtype,
) -> None:
    self.scaling_factor = scaling_factor
    super().__init__(head_size, rotary_dim, max_position_embeddings, base,
                     is_neox_style, dtype)

_compute_cos_sin_cache

_compute_cos_sin_cache() -> Tensor
Source code in vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
def _compute_cos_sin_cache(self) -> torch.Tensor:
    # NOTE(woosuk): self.max_position_embeddings is the original
    # maximum length before applying the rope scaling.
    # Thus, the maximum length after applying the rope scaling is
    # self.max_position_embeddings * self.scaling_factor.
    max_len = self.max_position_embeddings * self.scaling_factor
    base = self.base * (
        (self.scaling_factor * max_len / self.max_position_embeddings) -
        (self.scaling_factor - 1))**(self.rotary_dim /
                                     (self.rotary_dim - 2))
    inv_freq = self._compute_inv_freq(base)
    t = torch.arange(max_len, dtype=torch.float)

    freqs = torch.einsum("i,j -> ij", t, inv_freq)
    cos = freqs.cos()
    sin = freqs.sin()
    cache = torch.cat((cos, sin), dim=-1)
    return cache