Bases: ReasoningParser
Reasoning parser for GptOss model.
The GptOss model uses harmony to extract reasoning content and this parser is only used for detecting the end of the reasoning content.
Source code in vllm/reasoning/gptoss_reasoning_parser.py
| @ReasoningParserManager.register_module("GptOss")
class GptOssReasoningParser(ReasoningParser):
"""
Reasoning parser for GptOss model.
The GptOss model uses harmony to extract reasoning content and this parser
is only used for detecting the end of the reasoning content.
"""
def __init__(self, tokenizer: PreTrainedTokenizerBase):
super().__init__(tokenizer)
self.reasoning_end_token_ids = self.model_tokenizer.encode(
"<|start|>assistant<|channel|>final<|message|>")
def is_reasoning_end(self, input_ids: list[int]) -> bool:
end_token_ids = self.reasoning_end_token_ids
assert len(end_token_ids) > 0, "reasoning_end_token_ids is empty"
# Check if the end sequence is present in the input_ids.
# We search from the end of input_ids to find the last match.
for i in range(len(input_ids) - len(end_token_ids), -1, -1):
if input_ids[i:i + len(end_token_ids)] == end_token_ids:
return True
return False
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
raise RuntimeError(
"GptOss model uses harmony to extract reasoning content. This "
"function should not be called.")
def extract_reasoning_content_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> Union[DeltaMessage, None]:
raise RuntimeError(
"GptOss model uses harmony to extract reasoning content. This "
"function should not be called.")
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[Optional[str], Optional[str]]:
raise RuntimeError(
"GptOss model uses harmony to extract reasoning content. This "
"function should not be called.")
|
reasoning_end_token_ids instance-attribute
reasoning_end_token_ids = encode(
"<|start|>assistant<|channel|>final<|message|>"
)
__init__
__init__(tokenizer: PreTrainedTokenizerBase)
Source code in vllm/reasoning/gptoss_reasoning_parser.py
| def __init__(self, tokenizer: PreTrainedTokenizerBase):
super().__init__(tokenizer)
self.reasoning_end_token_ids = self.model_tokenizer.encode(
"<|start|>assistant<|channel|>final<|message|>")
|
Source code in vllm/reasoning/gptoss_reasoning_parser.py
| def extract_content_ids(self, input_ids: list[int]) -> list[int]:
raise RuntimeError(
"GptOss model uses harmony to extract reasoning content. This "
"function should not be called.")
|
Source code in vllm/reasoning/gptoss_reasoning_parser.py
| def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[Optional[str], Optional[str]]:
raise RuntimeError(
"GptOss model uses harmony to extract reasoning content. This "
"function should not be called.")
|
Source code in vllm/reasoning/gptoss_reasoning_parser.py
| def extract_reasoning_content_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> Union[DeltaMessage, None]:
raise RuntimeError(
"GptOss model uses harmony to extract reasoning content. This "
"function should not be called.")
|
is_reasoning_end
Source code in vllm/reasoning/gptoss_reasoning_parser.py
| def is_reasoning_end(self, input_ids: list[int]) -> bool:
end_token_ids = self.reasoning_end_token_ids
assert len(end_token_ids) > 0, "reasoning_end_token_ids is empty"
# Check if the end sequence is present in the input_ids.
# We search from the end of input_ids to find the last match.
for i in range(len(input_ids) - len(end_token_ids), -1, -1):
if input_ids[i:i + len(end_token_ids)] == end_token_ids:
return True
return False
|