Skip to content

vllm.entrypoints.openai.tool_parsers

Modules:

Name Description
abstract_tool_parser
deepseekv31_tool_parser
deepseekv3_tool_parser
glm4_moe_tool_parser
granite_20b_fc_tool_parser
granite_tool_parser
hermes_tool_parser
hunyuan_a13b_tool_parser
internlm2_tool_parser
jamba_tool_parser
kimi_k2_tool_parser
llama4_pythonic_tool_parser
llama_tool_parser
minimax_tool_parser
mistral_tool_parser
phi4mini_tool_parser
pythonic_tool_parser
qwen3coder_tool_parser
seed_oss_tool_parser
step3_tool_parser
utils
xlam_tool_parser

__all__ module-attribute

__all__ = [
    "ToolParser",
    "ToolParserManager",
    "Granite20bFCToolParser",
    "GraniteToolParser",
    "Hermes2ProToolParser",
    "MistralToolParser",
    "Internlm2ToolParser",
    "Llama3JsonToolParser",
    "JambaToolParser",
    "Llama4PythonicToolParser",
    "PythonicToolParser",
    "Phi4MiniJsonToolParser",
    "DeepSeekV3ToolParser",
    "DeepSeekV31ToolParser",
    "xLAMToolParser",
    "MinimaxToolParser",
    "KimiK2ToolParser",
    "HunyuanA13BToolParser",
    "Glm4MoeModelToolParser",
    "Qwen3CoderToolParser",
    "SeedOssToolParser",
    "Step3ToolParser",
]

DeepSeekV31ToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py
@ToolParserManager.register_module("deepseek_v31")
class DeepSeekV31ToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        self.current_tool_name_sent: bool = False
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.streamed_args_for_tool: list[str] = (
            [])  # map what has been streamed for each tool so far to a list

        self.tool_calls_start_token: str = "<|tool▁calls▁begin|>"
        self.tool_calls_end_token: str = "<|tool▁calls▁end|>"

        self.tool_call_start_token: str = "<|tool▁call▁begin|>"
        self.tool_call_end_token: str = "<|tool▁call▁end|>"

        self.tool_call_regex = re.compile(
            r"<|tool▁call▁begin|>(?P<function_name>.*)<|tool▁sep|>(?P<function_arguments>.*)<|tool▁call▁end|>"
        )

        self.stream_tool_call_portion_regex = re.compile(
            r"(?P<function_name>.*)<|tool▁sep|>(?P<function_arguments>.*)")

        self.stream_tool_call_name_regex = re.compile(
            r"(?P<function_name>.*)<|tool▁sep|>")

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")
        self.tool_calls_start_token_id = self.vocab.get(
            self.tool_calls_start_token)
        self.tool_calls_end_token_id = self.vocab.get(
            self.tool_calls_end_token)

        self.tool_call_start_token_id = self.vocab.get(
            self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

        if (self.tool_calls_start_token_id is None
                or self.tool_calls_end_token_id is None):
            raise RuntimeError(
                "DeepSeek-V3.1 Tool parser could not locate tool call "
                "start/end tokens in the tokenizer!")

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:

        # sanity check; avoid unnecessary processing
        if self.tool_calls_start_token not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        else:
            try:
                # there are two possible captures - between tags, or between a
                # tag and end-of-string so the result of
                # findall is an array of tuples where one is a function call and
                # the other is None
                function_call_tuples = self.tool_call_regex.findall(
                    model_output)

                tool_calls = []
                for match in function_call_tuples:
                    function_name, function_args = match
                    tool_calls.append(
                        ToolCall(
                            type="function",
                            function=FunctionCall(name=function_name,
                                                  arguments=function_args),
                        ))

                content = model_output[:model_output.
                                       find(self.tool_calls_start_token)]
                return ExtractedToolCallInformation(
                    tools_called=True,
                    tool_calls=tool_calls,
                    content=content if content else None,
                )

            except Exception:
                logger.exception(
                    "Error in extracting tool call from response.")
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        logger.debug("delta_text: %s", delta_text)
        logger.debug("delta_token_ids: %s", delta_token_ids)
        # check to see if we should be streaming a tool call - is there a
        if self.tool_calls_start_token_id not in current_token_ids:
            logger.debug("No tool call tokens found!")
            return DeltaMessage(content=delta_text)
        delta_text = delta_text.replace(self.tool_calls_start_token,
                                        "").replace(self.tool_calls_end_token,
                                                    "")
        try:

            # figure out where we are in the parsing by counting tool call
            # start & end tags
            prev_tool_start_count = previous_token_ids.count(
                self.tool_call_start_token_id)
            prev_tool_end_count = previous_token_ids.count(
                self.tool_call_end_token_id)
            cur_tool_start_count = current_token_ids.count(
                self.tool_call_start_token_id)
            cur_tool_end_count = current_token_ids.count(
                self.tool_call_end_token_id)
            tool_call_portion = None
            text_portion = None

            # case: if we're generating text, OR rounding out a tool call
            if (cur_tool_start_count == cur_tool_end_count
                    and prev_tool_end_count == cur_tool_end_count
                    and self.tool_call_end_token not in delta_text):
                logger.debug("Generating text content! skipping tool parsing.")
                return DeltaMessage(content=delta_text)

            if self.tool_call_end_token in delta_text:
                logger.debug("tool_call_end_token in delta_text")
                full_text = current_text + delta_text
                tool_call_portion = full_text.split(
                    self.tool_call_start_token)[-1].split(
                        self.tool_call_end_token)[0].rstrip()
                delta_text = delta_text.split(
                    self.tool_call_end_token)[0].rstrip()
                text_portion = delta_text.split(
                    self.tool_call_end_token)[-1].lstrip()

            # case -- we're starting a new tool call
            if (cur_tool_start_count > cur_tool_end_count
                    and cur_tool_start_count > prev_tool_start_count):
                if len(delta_token_ids) > 1:
                    tool_call_portion = current_text.split(
                        self.tool_call_start_token)[-1]
                else:
                    tool_call_portion = None
                    delta = None

                text_portion = None

                # set cursors and state appropriately
                self.current_tool_id += 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("Starting on a new tool %s", self.current_tool_id)

            # case -- we're updating an existing tool call
            elif (cur_tool_start_count > cur_tool_end_count
                  and cur_tool_start_count == prev_tool_start_count):

                # get the portion of the text that's the tool call
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
                text_portion = None

            # case -- the current tool call is being closed.
            elif (cur_tool_start_count == cur_tool_end_count
                  and cur_tool_end_count >= prev_tool_end_count):
                if self.prev_tool_call_arr is None or len(
                        self.prev_tool_call_arr) == 0:
                    logger.debug(
                        "attempting to close tool call, but no tool call")
                    return None
                diff = self.prev_tool_call_arr[self.current_tool_id].get(
                    "arguments")
                if diff:
                    diff = (diff.encode("utf-8").decode("unicode_escape")
                            if diff is str else diff)
                    if '"}' not in delta_text:
                        return None
                    end_loc = delta_text.rindex('"}')
                    diff = delta_text[:end_loc] + '"}'
                    logger.debug(
                        "Finishing tool and found diff that had not "
                        "been streamed yet: %s",
                        diff,
                    )
                    self.streamed_args_for_tool[self.current_tool_id] += diff
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            function=DeltaFunctionCall(
                                arguments=diff).model_dump(exclude_none=True),
                        )
                    ])

            # case -- otherwise we're just generating text
            else:
                text = delta_text.replace(self.tool_call_start_token, "")
                text = text.replace(self.tool_call_end_token, "")
                delta = DeltaMessage(tool_calls=[], content=text)
                return delta

            current_tool_call = dict()
            if tool_call_portion:
                current_tool_call_matches = (
                    self.stream_tool_call_portion_regex.match(
                        tool_call_portion))
                if current_tool_call_matches:
                    tool_name, tool_args = current_tool_call_matches.groups()
                    current_tool_call["name"] = tool_name
                    current_tool_call["arguments"] = tool_args
                else:
                    current_tool_call_name_matches = (
                        self.stream_tool_call_name_regex.match(
                            tool_call_portion))
                    if current_tool_call_name_matches:
                        tool_name = current_tool_call_name_matches.groups()
                        current_tool_call["name"] = tool_name
                        current_tool_call["arguments"] = ""
                    else:
                        logger.debug("Not enough token")
                        return None

            # case - we haven't sent the tool name yet. If it's available, send
            #   it. otherwise, wait until it's available.
            if not self.current_tool_name_sent:
                if current_tool_call is None:
                    return None
                function_name: Union[str, None] = current_tool_call.get("name")
                if function_name:
                    self.current_tool_name_sent = True
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            type="function",
                            id=make_tool_call_id(),
                            function=DeltaFunctionCall(
                                name=function_name).model_dump(
                                    exclude_none=True),
                        )
                    ])
                else:
                    return None

            # case -- otherwise, send the tool call delta

            # if the tool call portion is None, send the delta as text
            if tool_call_portion is None:
                # if there's text but not tool calls, send that -
                # otherwise None to skip chunk
                delta = (DeltaMessage(
                    content=delta_text) if text_portion is not None else None)
                return delta

            # now, the nitty-gritty of tool calls
            # now we have the portion to parse as tool call.

            logger.debug("Trying to parse current tool call with ID %s",
                         self.current_tool_id)

            # if we're starting a new tool call, push an empty object in as
            #   a placeholder for the arguments
            if len(self.prev_tool_call_arr) <= self.current_tool_id:
                self.prev_tool_call_arr.append({})

            # main logic for tool parsing here - compare prev. partially-parsed
            #   JSON to the current partially-parsed JSON
            prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
                "arguments")
            cur_arguments = current_tool_call.get("arguments")

            logger.debug("diffing old arguments: %s", prev_arguments)
            logger.debug("against new ones: %s", cur_arguments)

            # case -- no arguments have been created yet. skip sending a delta.
            if not cur_arguments and not prev_arguments:
                logger.debug("Skipping text %s - no arguments", delta_text)
                delta = None

            # case -- prev arguments are defined, but non are now.
            #   probably impossible, but not a fatal error - just keep going
            elif not cur_arguments and prev_arguments:
                logger.error("should be impossible to have arguments reset "
                             "mid-call. skipping streaming anything.")
                delta = None

            # case -- we now have the first info about arguments available from
            #   autocompleting the JSON
            elif cur_arguments and not prev_arguments:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=cur_arguments).model_dump(
                                exclude_none=True),
                    )
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] = cur_arguments

            # last case -- we have an update to existing arguments.
            elif cur_arguments and prev_arguments:
                if (isinstance(delta_text, str)
                        and cur_arguments != prev_arguments
                        and len(cur_arguments) > len(prev_arguments)
                        and cur_arguments.startswith(prev_arguments)):
                    delta_arguments = cur_arguments[len(prev_arguments):]
                    logger.debug("got diff %s", delta_text)

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            function=DeltaFunctionCall(
                                arguments=delta_arguments).model_dump(
                                    exclude_none=True),
                        )
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] = cur_arguments
                else:
                    delta = None

            # handle saving the state for the current tool into
            # the "prev" list for use in diffing for the next iteration
            if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
                self.prev_tool_call_arr[
                    self.current_tool_id] = current_tool_call
            else:
                self.prev_tool_call_arr.append(current_tool_call)

            return delta

        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            return None  # do not stream a delta. skip this token ID.

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

stream_tool_call_name_regex instance-attribute

stream_tool_call_name_regex = compile(
    "(?P<function_name>.*)<|tool▁sep|>"
)

stream_tool_call_portion_regex instance-attribute

stream_tool_call_portion_regex = compile(
    "(?P<function_name>.*)<|tool▁sep|>(?P<function_arguments>.*)"
)

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_end_token instance-attribute

tool_call_end_token: str = '<|tool▁call▁end|>'

tool_call_end_token_id instance-attribute

tool_call_end_token_id = get(tool_call_end_token)

tool_call_regex instance-attribute

tool_call_regex = compile(
    "<|tool▁call▁begin|>(?P<function_name>.*)<|tool▁sep|>(?P<function_arguments>.*)<|tool▁call▁end|>"
)

tool_call_start_token instance-attribute

tool_call_start_token: str = '<|tool▁call▁begin|>'

tool_call_start_token_id instance-attribute

tool_call_start_token_id = get(tool_call_start_token)

tool_calls_end_token instance-attribute

tool_calls_end_token: str = '<|tool▁calls▁end|>'

tool_calls_end_token_id instance-attribute

tool_calls_end_token_id = get(tool_calls_end_token)

tool_calls_start_token instance-attribute

tool_calls_start_token: str = '<|tool▁calls▁begin|>'

tool_calls_start_token_id instance-attribute

tool_calls_start_token_id = get(tool_calls_start_token)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    self.current_tool_name_sent: bool = False
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id: int = -1
    self.streamed_args_for_tool: list[str] = (
        [])  # map what has been streamed for each tool so far to a list

    self.tool_calls_start_token: str = "<|tool▁calls▁begin|>"
    self.tool_calls_end_token: str = "<|tool▁calls▁end|>"

    self.tool_call_start_token: str = "<|tool▁call▁begin|>"
    self.tool_call_end_token: str = "<|tool▁call▁end|>"

    self.tool_call_regex = re.compile(
        r"<|tool▁call▁begin|>(?P<function_name>.*)<|tool▁sep|>(?P<function_arguments>.*)<|tool▁call▁end|>"
    )

    self.stream_tool_call_portion_regex = re.compile(
        r"(?P<function_name>.*)<|tool▁sep|>(?P<function_arguments>.*)")

    self.stream_tool_call_name_regex = re.compile(
        r"(?P<function_name>.*)<|tool▁sep|>")

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")
    self.tool_calls_start_token_id = self.vocab.get(
        self.tool_calls_start_token)
    self.tool_calls_end_token_id = self.vocab.get(
        self.tool_calls_end_token)

    self.tool_call_start_token_id = self.vocab.get(
        self.tool_call_start_token)
    self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

    if (self.tool_calls_start_token_id is None
            or self.tool_calls_end_token_id is None):
        raise RuntimeError(
            "DeepSeek-V3.1 Tool parser could not locate tool call "
            "start/end tokens in the tokenizer!")

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:

    # sanity check; avoid unnecessary processing
    if self.tool_calls_start_token not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    else:
        try:
            # there are two possible captures - between tags, or between a
            # tag and end-of-string so the result of
            # findall is an array of tuples where one is a function call and
            # the other is None
            function_call_tuples = self.tool_call_regex.findall(
                model_output)

            tool_calls = []
            for match in function_call_tuples:
                function_name, function_args = match
                tool_calls.append(
                    ToolCall(
                        type="function",
                        function=FunctionCall(name=function_name,
                                              arguments=function_args),
                    ))

            content = model_output[:model_output.
                                   find(self.tool_calls_start_token)]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if content else None,
            )

        except Exception:
            logger.exception(
                "Error in extracting tool call from response.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    logger.debug("delta_text: %s", delta_text)
    logger.debug("delta_token_ids: %s", delta_token_ids)
    # check to see if we should be streaming a tool call - is there a
    if self.tool_calls_start_token_id not in current_token_ids:
        logger.debug("No tool call tokens found!")
        return DeltaMessage(content=delta_text)
    delta_text = delta_text.replace(self.tool_calls_start_token,
                                    "").replace(self.tool_calls_end_token,
                                                "")
    try:

        # figure out where we are in the parsing by counting tool call
        # start & end tags
        prev_tool_start_count = previous_token_ids.count(
            self.tool_call_start_token_id)
        prev_tool_end_count = previous_token_ids.count(
            self.tool_call_end_token_id)
        cur_tool_start_count = current_token_ids.count(
            self.tool_call_start_token_id)
        cur_tool_end_count = current_token_ids.count(
            self.tool_call_end_token_id)
        tool_call_portion = None
        text_portion = None

        # case: if we're generating text, OR rounding out a tool call
        if (cur_tool_start_count == cur_tool_end_count
                and prev_tool_end_count == cur_tool_end_count
                and self.tool_call_end_token not in delta_text):
            logger.debug("Generating text content! skipping tool parsing.")
            return DeltaMessage(content=delta_text)

        if self.tool_call_end_token in delta_text:
            logger.debug("tool_call_end_token in delta_text")
            full_text = current_text + delta_text
            tool_call_portion = full_text.split(
                self.tool_call_start_token)[-1].split(
                    self.tool_call_end_token)[0].rstrip()
            delta_text = delta_text.split(
                self.tool_call_end_token)[0].rstrip()
            text_portion = delta_text.split(
                self.tool_call_end_token)[-1].lstrip()

        # case -- we're starting a new tool call
        if (cur_tool_start_count > cur_tool_end_count
                and cur_tool_start_count > prev_tool_start_count):
            if len(delta_token_ids) > 1:
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
            else:
                tool_call_portion = None
                delta = None

            text_portion = None

            # set cursors and state appropriately
            self.current_tool_id += 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("Starting on a new tool %s", self.current_tool_id)

        # case -- we're updating an existing tool call
        elif (cur_tool_start_count > cur_tool_end_count
              and cur_tool_start_count == prev_tool_start_count):

            # get the portion of the text that's the tool call
            tool_call_portion = current_text.split(
                self.tool_call_start_token)[-1]
            text_portion = None

        # case -- the current tool call is being closed.
        elif (cur_tool_start_count == cur_tool_end_count
              and cur_tool_end_count >= prev_tool_end_count):
            if self.prev_tool_call_arr is None or len(
                    self.prev_tool_call_arr) == 0:
                logger.debug(
                    "attempting to close tool call, but no tool call")
                return None
            diff = self.prev_tool_call_arr[self.current_tool_id].get(
                "arguments")
            if diff:
                diff = (diff.encode("utf-8").decode("unicode_escape")
                        if diff is str else diff)
                if '"}' not in delta_text:
                    return None
                end_loc = delta_text.rindex('"}')
                diff = delta_text[:end_loc] + '"}'
                logger.debug(
                    "Finishing tool and found diff that had not "
                    "been streamed yet: %s",
                    diff,
                )
                self.streamed_args_for_tool[self.current_tool_id] += diff
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=diff).model_dump(exclude_none=True),
                    )
                ])

        # case -- otherwise we're just generating text
        else:
            text = delta_text.replace(self.tool_call_start_token, "")
            text = text.replace(self.tool_call_end_token, "")
            delta = DeltaMessage(tool_calls=[], content=text)
            return delta

        current_tool_call = dict()
        if tool_call_portion:
            current_tool_call_matches = (
                self.stream_tool_call_portion_regex.match(
                    tool_call_portion))
            if current_tool_call_matches:
                tool_name, tool_args = current_tool_call_matches.groups()
                current_tool_call["name"] = tool_name
                current_tool_call["arguments"] = tool_args
            else:
                current_tool_call_name_matches = (
                    self.stream_tool_call_name_regex.match(
                        tool_call_portion))
                if current_tool_call_name_matches:
                    tool_name = current_tool_call_name_matches.groups()
                    current_tool_call["name"] = tool_name
                    current_tool_call["arguments"] = ""
                else:
                    logger.debug("Not enough token")
                    return None

        # case - we haven't sent the tool name yet. If it's available, send
        #   it. otherwise, wait until it's available.
        if not self.current_tool_name_sent:
            if current_tool_call is None:
                return None
            function_name: Union[str, None] = current_tool_call.get("name")
            if function_name:
                self.current_tool_name_sent = True
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        type="function",
                        id=make_tool_call_id(),
                        function=DeltaFunctionCall(
                            name=function_name).model_dump(
                                exclude_none=True),
                    )
                ])
            else:
                return None

        # case -- otherwise, send the tool call delta

        # if the tool call portion is None, send the delta as text
        if tool_call_portion is None:
            # if there's text but not tool calls, send that -
            # otherwise None to skip chunk
            delta = (DeltaMessage(
                content=delta_text) if text_portion is not None else None)
            return delta

        # now, the nitty-gritty of tool calls
        # now we have the portion to parse as tool call.

        logger.debug("Trying to parse current tool call with ID %s",
                     self.current_tool_id)

        # if we're starting a new tool call, push an empty object in as
        #   a placeholder for the arguments
        if len(self.prev_tool_call_arr) <= self.current_tool_id:
            self.prev_tool_call_arr.append({})

        # main logic for tool parsing here - compare prev. partially-parsed
        #   JSON to the current partially-parsed JSON
        prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
            "arguments")
        cur_arguments = current_tool_call.get("arguments")

        logger.debug("diffing old arguments: %s", prev_arguments)
        logger.debug("against new ones: %s", cur_arguments)

        # case -- no arguments have been created yet. skip sending a delta.
        if not cur_arguments and not prev_arguments:
            logger.debug("Skipping text %s - no arguments", delta_text)
            delta = None

        # case -- prev arguments are defined, but non are now.
        #   probably impossible, but not a fatal error - just keep going
        elif not cur_arguments and prev_arguments:
            logger.error("should be impossible to have arguments reset "
                         "mid-call. skipping streaming anything.")
            delta = None

        # case -- we now have the first info about arguments available from
        #   autocompleting the JSON
        elif cur_arguments and not prev_arguments:

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.current_tool_id,
                    function=DeltaFunctionCall(
                        arguments=cur_arguments).model_dump(
                            exclude_none=True),
                )
            ])
            self.streamed_args_for_tool[
                self.current_tool_id] = cur_arguments

        # last case -- we have an update to existing arguments.
        elif cur_arguments and prev_arguments:
            if (isinstance(delta_text, str)
                    and cur_arguments != prev_arguments
                    and len(cur_arguments) > len(prev_arguments)
                    and cur_arguments.startswith(prev_arguments)):
                delta_arguments = cur_arguments[len(prev_arguments):]
                logger.debug("got diff %s", delta_text)

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=delta_arguments).model_dump(
                                exclude_none=True),
                    )
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] = cur_arguments
            else:
                delta = None

        # handle saving the state for the current tool into
        # the "prev" list for use in diffing for the next iteration
        if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
            self.prev_tool_call_arr[
                self.current_tool_id] = current_tool_call
        else:
            self.prev_tool_call_arr.append(current_tool_call)

        return delta

    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        return None  # do not stream a delta. skip this token ID.

DeepSeekV3ToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py
@ToolParserManager.register_module("deepseek_v3")
class DeepSeekV3ToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        self.current_tool_name_sent: bool = False
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.streamed_args_for_tool: list[str] = (
            [])  # map what has been streamed for each tool so far to a list

        self.tool_calls_start_token: str = "<|tool▁calls▁begin|>"
        self.tool_calls_end_token: str = "<|tool▁calls▁end|>"

        self.tool_call_start_token: str = "<|tool▁call▁begin|>"
        self.tool_call_end_token: str = "<|tool▁call▁end|>"

        self.tool_call_regex = re.compile(
            r"<|tool▁call▁begin|>(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*)\n```<|tool▁call▁end|>"
        )

        self.stream_tool_call_portion_regex = re.compile(
            r"(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*[^\n`])"
        )

        self.stream_tool_call_name_regex = re.compile(
            r"(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n")

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")
        self.tool_calls_start_token_id = self.vocab.get(
            self.tool_calls_start_token)
        self.tool_calls_end_token_id = self.vocab.get(
            self.tool_calls_end_token)

        self.tool_call_start_token_id = self.vocab.get(
            self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

        if (self.tool_calls_start_token_id is None
                or self.tool_calls_end_token_id is None):
            raise RuntimeError(
                "DeepSeek-V3 Tool parser could not locate tool call start/end "
                "tokens in the tokenizer!")

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:

        # sanity check; avoid unnecessary processing
        if self.tool_calls_start_token not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        else:
            try:
                # there are two possible captures - between tags, or between a
                # tag and end-of-string so the result of
                # findall is an array of tuples where one is a function call and
                # the other is None
                function_call_tuples = self.tool_call_regex.findall(
                    model_output)

                tool_calls = []
                for match in function_call_tuples:
                    tool_type, function_name, function_args = match
                    tool_calls.append(
                        ToolCall(
                            type=tool_type,
                            function=FunctionCall(name=function_name,
                                                  arguments=function_args),
                        ))

                content = model_output[:model_output.
                                       find(self.tool_calls_start_token)]
                return ExtractedToolCallInformation(
                    tools_called=True,
                    tool_calls=tool_calls,
                    content=content if content else None,
                )

            except Exception:
                logger.exception(
                    "Error in extracting tool call from response.")
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        logger.debug("delta_text: %s", delta_text)
        logger.debug("delta_token_ids: %s", delta_token_ids)
        # check to see if we should be streaming a tool call - is there a
        if self.tool_calls_start_token_id not in current_token_ids:
            logger.debug("No tool call tokens found!")
            return DeltaMessage(content=delta_text)
        delta_text = delta_text.replace(self.tool_calls_start_token,
                                        "").replace(self.tool_calls_end_token,
                                                    "")
        try:

            # figure out where we are in the parsing by counting tool call
            # start & end tags
            prev_tool_start_count = previous_token_ids.count(
                self.tool_call_start_token_id)
            prev_tool_end_count = previous_token_ids.count(
                self.tool_call_end_token_id)
            cur_tool_start_count = current_token_ids.count(
                self.tool_call_start_token_id)
            cur_tool_end_count = current_token_ids.count(
                self.tool_call_end_token_id)
            tool_call_portion = None
            text_portion = None

            # case: if we're generating text, OR rounding out a tool call
            if (cur_tool_start_count == cur_tool_end_count
                    and prev_tool_end_count == cur_tool_end_count
                    and self.tool_call_end_token not in delta_text):
                logger.debug("Generating text content! skipping tool parsing.")
                return DeltaMessage(content=delta_text)

            if self.tool_call_end_token in delta_text:
                logger.debug("tool_call_end_token in delta_text")
                full_text = current_text + delta_text
                tool_call_portion = full_text.split(
                    self.tool_call_start_token)[-1].split(
                        self.tool_call_end_token)[0].rstrip()
                delta_text = delta_text.split(
                    self.tool_call_end_token)[0].rstrip()
                text_portion = delta_text.split(
                    self.tool_call_end_token)[-1].lstrip()

            # case -- we're starting a new tool call
            if (cur_tool_start_count > cur_tool_end_count
                    and cur_tool_start_count > prev_tool_start_count):
                if len(delta_token_ids) > 1:
                    tool_call_portion = current_text.split(
                        self.tool_call_start_token)[-1]
                else:
                    tool_call_portion = None
                    delta = None

                text_portion = None

                # set cursors and state appropriately
                self.current_tool_id += 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("Starting on a new tool %s", self.current_tool_id)

            # case -- we're updating an existing tool call
            elif (cur_tool_start_count > cur_tool_end_count
                  and cur_tool_start_count == prev_tool_start_count):

                # get the portion of the text that's the tool call
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
                text_portion = None

            # case -- the current tool call is being closed.
            elif (cur_tool_start_count == cur_tool_end_count
                  and cur_tool_end_count >= prev_tool_end_count):
                if self.prev_tool_call_arr is None or len(
                        self.prev_tool_call_arr) == 0:
                    logger.debug(
                        "attempting to close tool call, but no tool call")
                    return None
                diff = self.prev_tool_call_arr[self.current_tool_id].get(
                    "arguments")
                if diff:
                    diff = (diff.encode("utf-8").decode("unicode_escape")
                            if diff is str else diff)
                    if '"}' not in delta_text:
                        return None
                    end_loc = delta_text.rindex('"}')
                    diff = delta_text[:end_loc] + '"}'
                    logger.debug(
                        "Finishing tool and found diff that had not "
                        "been streamed yet: %s",
                        diff,
                    )
                    self.streamed_args_for_tool[self.current_tool_id] += diff
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            function=DeltaFunctionCall(
                                arguments=diff).model_dump(exclude_none=True),
                        )
                    ])

            # case -- otherwise we're just generating text
            else:
                text = delta_text.replace(self.tool_call_start_token, "")
                text = text.replace(self.tool_call_end_token, "")
                delta = DeltaMessage(tool_calls=[], content=text)
                return delta

            current_tool_call = dict()
            if tool_call_portion:
                current_tool_call_matches = (
                    self.stream_tool_call_portion_regex.match(
                        tool_call_portion))
                if current_tool_call_matches:
                    tool_type, tool_name, tool_args = (
                        current_tool_call_matches.groups())
                    current_tool_call["name"] = tool_name
                    current_tool_call["arguments"] = tool_args
                else:
                    current_tool_call_name_matches = (
                        self.stream_tool_call_name_regex.match(
                            tool_call_portion))
                    if current_tool_call_name_matches:
                        tool_type, tool_name = (
                            current_tool_call_name_matches.groups())
                        current_tool_call["name"] = tool_name
                        current_tool_call["arguments"] = ""
                    else:
                        logger.debug("Not enough token")
                        return None

            # case - we haven't sent the tool name yet. If it's available, send
            #   it. otherwise, wait until it's available.
            if not self.current_tool_name_sent:
                if current_tool_call is None:
                    return None
                function_name: Union[str, None] = current_tool_call.get("name")
                if function_name:
                    self.current_tool_name_sent = True
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            type="function",
                            id=make_tool_call_id(),
                            function=DeltaFunctionCall(
                                name=function_name).model_dump(
                                    exclude_none=True),
                        )
                    ])
                else:
                    return None

            # case -- otherwise, send the tool call delta

            # if the tool call portion is None, send the delta as text
            if tool_call_portion is None:
                # if there's text but not tool calls, send that -
                # otherwise None to skip chunk
                delta = (DeltaMessage(
                    content=delta_text) if text_portion is not None else None)
                return delta

            # now, the nitty-gritty of tool calls
            # now we have the portion to parse as tool call.

            logger.debug("Trying to parse current tool call with ID %s",
                         self.current_tool_id)

            # if we're starting a new tool call, push an empty object in as
            #   a placeholder for the arguments
            if len(self.prev_tool_call_arr) <= self.current_tool_id:
                self.prev_tool_call_arr.append({})

            # main logic for tool parsing here - compare prev. partially-parsed
            #   JSON to the current partially-parsed JSON
            prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
                "arguments")
            cur_arguments = current_tool_call.get("arguments")

            logger.debug("diffing old arguments: %s", prev_arguments)
            logger.debug("against new ones: %s", cur_arguments)

            # case -- no arguments have been created yet. skip sending a delta.
            if not cur_arguments and not prev_arguments:
                logger.debug("Skipping text %s - no arguments", delta_text)
                delta = None

            # case -- prev arguments are defined, but non are now.
            #   probably impossible, but not a fatal error - just keep going
            elif not cur_arguments and prev_arguments:
                logger.error("should be impossible to have arguments reset "
                             "mid-call. skipping streaming anything.")
                delta = None

            # case -- we now have the first info about arguments available from
            #   autocompleting the JSON
            elif cur_arguments and not prev_arguments:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=cur_arguments).model_dump(
                                exclude_none=True),
                    )
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] = cur_arguments

            # last case -- we have an update to existing arguments.
            elif cur_arguments and prev_arguments:
                if (isinstance(delta_text, str)
                        and cur_arguments != prev_arguments
                        and len(cur_arguments) > len(prev_arguments)
                        and cur_arguments.startswith(prev_arguments)):
                    delta_arguments = cur_arguments[len(prev_arguments):]
                    logger.debug("got diff %s", delta_text)

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            function=DeltaFunctionCall(
                                arguments=delta_arguments).model_dump(
                                    exclude_none=True),
                        )
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] = cur_arguments
                else:
                    delta = None

            # handle saving the state for the current tool into
            # the "prev" list for use in diffing for the next iteration
            if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
                self.prev_tool_call_arr[
                    self.current_tool_id] = current_tool_call
            else:
                self.prev_tool_call_arr.append(current_tool_call)

            return delta

        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            return None  # do not stream a delta. skip this token ID.

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

stream_tool_call_name_regex instance-attribute

stream_tool_call_name_regex = compile(
    "(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\\n"
)

stream_tool_call_portion_regex instance-attribute

stream_tool_call_portion_regex = compile(
    "(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\\n```json\\n(?P<function_arguments>.*[^\\n`])"
)

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_end_token instance-attribute

tool_call_end_token: str = '<|tool▁call▁end|>'

tool_call_end_token_id instance-attribute

tool_call_end_token_id = get(tool_call_end_token)

tool_call_regex instance-attribute

tool_call_regex = compile(
    "<|tool▁call▁begin|>(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\\n```json\\n(?P<function_arguments>.*)\\n```<|tool▁call▁end|>"
)

tool_call_start_token instance-attribute

tool_call_start_token: str = '<|tool▁call▁begin|>'

tool_call_start_token_id instance-attribute

tool_call_start_token_id = get(tool_call_start_token)

tool_calls_end_token instance-attribute

tool_calls_end_token: str = '<|tool▁calls▁end|>'

tool_calls_end_token_id instance-attribute

tool_calls_end_token_id = get(tool_calls_end_token)

tool_calls_start_token instance-attribute

tool_calls_start_token: str = '<|tool▁calls▁begin|>'

tool_calls_start_token_id instance-attribute

tool_calls_start_token_id = get(tool_calls_start_token)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    self.current_tool_name_sent: bool = False
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id: int = -1
    self.streamed_args_for_tool: list[str] = (
        [])  # map what has been streamed for each tool so far to a list

    self.tool_calls_start_token: str = "<|tool▁calls▁begin|>"
    self.tool_calls_end_token: str = "<|tool▁calls▁end|>"

    self.tool_call_start_token: str = "<|tool▁call▁begin|>"
    self.tool_call_end_token: str = "<|tool▁call▁end|>"

    self.tool_call_regex = re.compile(
        r"<|tool▁call▁begin|>(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*)\n```<|tool▁call▁end|>"
    )

    self.stream_tool_call_portion_regex = re.compile(
        r"(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*[^\n`])"
    )

    self.stream_tool_call_name_regex = re.compile(
        r"(?P<type>.*)<|tool▁sep|>(?P<function_name>.*)\n")

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")
    self.tool_calls_start_token_id = self.vocab.get(
        self.tool_calls_start_token)
    self.tool_calls_end_token_id = self.vocab.get(
        self.tool_calls_end_token)

    self.tool_call_start_token_id = self.vocab.get(
        self.tool_call_start_token)
    self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

    if (self.tool_calls_start_token_id is None
            or self.tool_calls_end_token_id is None):
        raise RuntimeError(
            "DeepSeek-V3 Tool parser could not locate tool call start/end "
            "tokens in the tokenizer!")

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:

    # sanity check; avoid unnecessary processing
    if self.tool_calls_start_token not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    else:
        try:
            # there are two possible captures - between tags, or between a
            # tag and end-of-string so the result of
            # findall is an array of tuples where one is a function call and
            # the other is None
            function_call_tuples = self.tool_call_regex.findall(
                model_output)

            tool_calls = []
            for match in function_call_tuples:
                tool_type, function_name, function_args = match
                tool_calls.append(
                    ToolCall(
                        type=tool_type,
                        function=FunctionCall(name=function_name,
                                              arguments=function_args),
                    ))

            content = model_output[:model_output.
                                   find(self.tool_calls_start_token)]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if content else None,
            )

        except Exception:
            logger.exception(
                "Error in extracting tool call from response.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    logger.debug("delta_text: %s", delta_text)
    logger.debug("delta_token_ids: %s", delta_token_ids)
    # check to see if we should be streaming a tool call - is there a
    if self.tool_calls_start_token_id not in current_token_ids:
        logger.debug("No tool call tokens found!")
        return DeltaMessage(content=delta_text)
    delta_text = delta_text.replace(self.tool_calls_start_token,
                                    "").replace(self.tool_calls_end_token,
                                                "")
    try:

        # figure out where we are in the parsing by counting tool call
        # start & end tags
        prev_tool_start_count = previous_token_ids.count(
            self.tool_call_start_token_id)
        prev_tool_end_count = previous_token_ids.count(
            self.tool_call_end_token_id)
        cur_tool_start_count = current_token_ids.count(
            self.tool_call_start_token_id)
        cur_tool_end_count = current_token_ids.count(
            self.tool_call_end_token_id)
        tool_call_portion = None
        text_portion = None

        # case: if we're generating text, OR rounding out a tool call
        if (cur_tool_start_count == cur_tool_end_count
                and prev_tool_end_count == cur_tool_end_count
                and self.tool_call_end_token not in delta_text):
            logger.debug("Generating text content! skipping tool parsing.")
            return DeltaMessage(content=delta_text)

        if self.tool_call_end_token in delta_text:
            logger.debug("tool_call_end_token in delta_text")
            full_text = current_text + delta_text
            tool_call_portion = full_text.split(
                self.tool_call_start_token)[-1].split(
                    self.tool_call_end_token)[0].rstrip()
            delta_text = delta_text.split(
                self.tool_call_end_token)[0].rstrip()
            text_portion = delta_text.split(
                self.tool_call_end_token)[-1].lstrip()

        # case -- we're starting a new tool call
        if (cur_tool_start_count > cur_tool_end_count
                and cur_tool_start_count > prev_tool_start_count):
            if len(delta_token_ids) > 1:
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
            else:
                tool_call_portion = None
                delta = None

            text_portion = None

            # set cursors and state appropriately
            self.current_tool_id += 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("Starting on a new tool %s", self.current_tool_id)

        # case -- we're updating an existing tool call
        elif (cur_tool_start_count > cur_tool_end_count
              and cur_tool_start_count == prev_tool_start_count):

            # get the portion of the text that's the tool call
            tool_call_portion = current_text.split(
                self.tool_call_start_token)[-1]
            text_portion = None

        # case -- the current tool call is being closed.
        elif (cur_tool_start_count == cur_tool_end_count
              and cur_tool_end_count >= prev_tool_end_count):
            if self.prev_tool_call_arr is None or len(
                    self.prev_tool_call_arr) == 0:
                logger.debug(
                    "attempting to close tool call, but no tool call")
                return None
            diff = self.prev_tool_call_arr[self.current_tool_id].get(
                "arguments")
            if diff:
                diff = (diff.encode("utf-8").decode("unicode_escape")
                        if diff is str else diff)
                if '"}' not in delta_text:
                    return None
                end_loc = delta_text.rindex('"}')
                diff = delta_text[:end_loc] + '"}'
                logger.debug(
                    "Finishing tool and found diff that had not "
                    "been streamed yet: %s",
                    diff,
                )
                self.streamed_args_for_tool[self.current_tool_id] += diff
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=diff).model_dump(exclude_none=True),
                    )
                ])

        # case -- otherwise we're just generating text
        else:
            text = delta_text.replace(self.tool_call_start_token, "")
            text = text.replace(self.tool_call_end_token, "")
            delta = DeltaMessage(tool_calls=[], content=text)
            return delta

        current_tool_call = dict()
        if tool_call_portion:
            current_tool_call_matches = (
                self.stream_tool_call_portion_regex.match(
                    tool_call_portion))
            if current_tool_call_matches:
                tool_type, tool_name, tool_args = (
                    current_tool_call_matches.groups())
                current_tool_call["name"] = tool_name
                current_tool_call["arguments"] = tool_args
            else:
                current_tool_call_name_matches = (
                    self.stream_tool_call_name_regex.match(
                        tool_call_portion))
                if current_tool_call_name_matches:
                    tool_type, tool_name = (
                        current_tool_call_name_matches.groups())
                    current_tool_call["name"] = tool_name
                    current_tool_call["arguments"] = ""
                else:
                    logger.debug("Not enough token")
                    return None

        # case - we haven't sent the tool name yet. If it's available, send
        #   it. otherwise, wait until it's available.
        if not self.current_tool_name_sent:
            if current_tool_call is None:
                return None
            function_name: Union[str, None] = current_tool_call.get("name")
            if function_name:
                self.current_tool_name_sent = True
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        type="function",
                        id=make_tool_call_id(),
                        function=DeltaFunctionCall(
                            name=function_name).model_dump(
                                exclude_none=True),
                    )
                ])
            else:
                return None

        # case -- otherwise, send the tool call delta

        # if the tool call portion is None, send the delta as text
        if tool_call_portion is None:
            # if there's text but not tool calls, send that -
            # otherwise None to skip chunk
            delta = (DeltaMessage(
                content=delta_text) if text_portion is not None else None)
            return delta

        # now, the nitty-gritty of tool calls
        # now we have the portion to parse as tool call.

        logger.debug("Trying to parse current tool call with ID %s",
                     self.current_tool_id)

        # if we're starting a new tool call, push an empty object in as
        #   a placeholder for the arguments
        if len(self.prev_tool_call_arr) <= self.current_tool_id:
            self.prev_tool_call_arr.append({})

        # main logic for tool parsing here - compare prev. partially-parsed
        #   JSON to the current partially-parsed JSON
        prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
            "arguments")
        cur_arguments = current_tool_call.get("arguments")

        logger.debug("diffing old arguments: %s", prev_arguments)
        logger.debug("against new ones: %s", cur_arguments)

        # case -- no arguments have been created yet. skip sending a delta.
        if not cur_arguments and not prev_arguments:
            logger.debug("Skipping text %s - no arguments", delta_text)
            delta = None

        # case -- prev arguments are defined, but non are now.
        #   probably impossible, but not a fatal error - just keep going
        elif not cur_arguments and prev_arguments:
            logger.error("should be impossible to have arguments reset "
                         "mid-call. skipping streaming anything.")
            delta = None

        # case -- we now have the first info about arguments available from
        #   autocompleting the JSON
        elif cur_arguments and not prev_arguments:

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.current_tool_id,
                    function=DeltaFunctionCall(
                        arguments=cur_arguments).model_dump(
                            exclude_none=True),
                )
            ])
            self.streamed_args_for_tool[
                self.current_tool_id] = cur_arguments

        # last case -- we have an update to existing arguments.
        elif cur_arguments and prev_arguments:
            if (isinstance(delta_text, str)
                    and cur_arguments != prev_arguments
                    and len(cur_arguments) > len(prev_arguments)
                    and cur_arguments.startswith(prev_arguments)):
                delta_arguments = cur_arguments[len(prev_arguments):]
                logger.debug("got diff %s", delta_text)

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=delta_arguments).model_dump(
                                exclude_none=True),
                    )
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] = cur_arguments
            else:
                delta = None

        # handle saving the state for the current tool into
        # the "prev" list for use in diffing for the next iteration
        if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
            self.prev_tool_call_arr[
                self.current_tool_id] = current_tool_call
        else:
            self.prev_tool_call_arr.append(current_tool_call)

        return delta

    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        return None  # do not stream a delta. skip this token ID.

Glm4MoeModelToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py
@ToolParserManager.register_module("glm45")
class Glm4MoeModelToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)
        self.current_tool_name_sent = False
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id = -1
        self.streamed_args_for_tool: list[str] = []
        self.tool_call_start_token = "<tool_call>"
        self.tool_call_end_token = "</tool_call>"

        self.tool_calls_start_token = self.tool_call_start_token

        self.func_call_regex = re.compile(r"<tool_call>.*?</tool_call>",
                                          re.DOTALL)
        self.func_detail_regex = re.compile(
            r"<tool_call>([^\n]*)\n(.*)</tool_call>", re.DOTALL)
        self.func_arg_regex = re.compile(
            r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>",
            re.DOTALL)
        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")

        self.tool_call_start_token_id = self.vocab.get(
            self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
        self._buffer = ""

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:

        def _is_string_type(
                tool_name: str, arg_name: str,
                tools: Optional[list[ChatCompletionToolsParam]]) -> bool:
            if tools is None:
                return False
            for tool in tools:
                if tool.function.name == tool_name:
                    if tool.function.parameters is None:
                        return False
                    arg_type = tool.function.parameters.get(
                        "properties", {}).get(arg_name, {}).get("type", None)
                    return arg_type == "string"
            logger.warning("No tool named '%s'.", tool_name)
            return False

        def _deserialize(value: str) -> Any:
            try:
                return json.loads(value)
            except Exception:
                pass

            try:
                return ast.literal_eval(value)
            except Exception:
                pass
            return value

        matched_tool_calls = self.func_call_regex.findall(model_output)
        logger.debug("model_output: %s", model_output)
        try:
            tool_calls = []
            for match in matched_tool_calls:
                tc_detail = self.func_detail_regex.search(match)
                tc_name = tc_detail.group(1)
                tc_args = tc_detail.group(2)
                pairs = self.func_arg_regex.findall(tc_args)
                arg_dct = {}
                for key, value in pairs:
                    arg_key = key.strip()
                    arg_val = value.strip()
                    if not _is_string_type(tc_name, arg_key, request.tools):
                        arg_val = _deserialize(arg_val)
                    logger.debug("arg_key = %s, arg_val = %s", arg_key,
                                 arg_val)
                    arg_dct[arg_key] = arg_val
                tool_calls.append(
                    ToolCall(type="function",
                             function=FunctionCall(
                                 name=tc_name, arguments=json.dumps(arg_dct))))
        except Exception:
            logger.exception("Failed to extract tool call spec")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)
        else:
            if len(tool_calls) > 0:
                content = model_output[:model_output.
                                       find(self.tool_calls_start_token)]
                return ExtractedToolCallInformation(tools_called=True,
                                                    tool_calls=tool_calls,
                                                    content=content)
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        self._buffer += delta_text
        cur_text = self._buffer
        start_idx = cur_text.find(self.tool_call_start_token)
        if start_idx == -1:
            self._buffer = ""
            if self.current_tool_id > 0:
                cur_text = ""
            return DeltaMessage(content=cur_text)
        logger.debug("cur_text = %s", cur_text)
        end_idx = cur_text.find(self.tool_call_end_token)
        if end_idx != -1:
            if self.current_tool_id == -1:
                self.current_tool_id = 0
                self.prev_tool_call_arr = []
                self.streamed_args_for_tool = []
            while len(self.prev_tool_call_arr) <= self.current_tool_id:
                self.prev_tool_call_arr.append({})
            while len(self.streamed_args_for_tool) <= self.current_tool_id:
                self.streamed_args_for_tool.append("")

            extracted_tool_calls = self.extract_tool_calls(
                cur_text[:end_idx + len(self.tool_call_end_token)], request)

            if len(extracted_tool_calls.tool_calls) == 0:
                logger.warning("Failed to extract any tool calls.")
                return None
            tool_call = extracted_tool_calls.tool_calls[0]
            self.prev_tool_call_arr[self.current_tool_id] = {
                "name": tool_call.function.name,
                "arguments": json.loads(tool_call.function.arguments)
            }
            self.streamed_args_for_tool[
                self.current_tool_id] = tool_call.function.arguments
            delta = DeltaMessage(
                content=extracted_tool_calls.content,
                tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  id=tool_call.id,
                                  type=tool_call.type,
                                  function=DeltaFunctionCall(
                                      name=tool_call.function.name,
                                      arguments=tool_call.function.arguments))
                ])
            self.current_tool_id += 1
            self._buffer = cur_text[end_idx + len(self.tool_call_end_token):]
            return delta

        self._buffer = cur_text[start_idx:]
        return DeltaMessage(content=cur_text[:start_idx])

_buffer instance-attribute

_buffer = ''

current_tool_id instance-attribute

current_tool_id = -1

current_tool_name_sent instance-attribute

current_tool_name_sent = False

func_arg_regex instance-attribute

func_arg_regex = compile(
    "<arg_key>(.*?)</arg_key>\\s*<arg_value>(.*?)</arg_value>",
    DOTALL,
)

func_call_regex instance-attribute

func_call_regex = compile(
    "<tool_call>.*?</tool_call>", DOTALL
)

func_detail_regex instance-attribute

func_detail_regex = compile(
    "<tool_call>([^\\n]*)\\n(.*)</tool_call>", DOTALL
)

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_end_token instance-attribute

tool_call_end_token = '</tool_call>'

tool_call_end_token_id instance-attribute

tool_call_end_token_id = get(tool_call_end_token)

tool_call_start_token instance-attribute

tool_call_start_token = '<tool_call>'

tool_call_start_token_id instance-attribute

tool_call_start_token_id = get(tool_call_start_token)

tool_calls_start_token instance-attribute

tool_calls_start_token = tool_call_start_token

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)
    self.current_tool_name_sent = False
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id = -1
    self.streamed_args_for_tool: list[str] = []
    self.tool_call_start_token = "<tool_call>"
    self.tool_call_end_token = "</tool_call>"

    self.tool_calls_start_token = self.tool_call_start_token

    self.func_call_regex = re.compile(r"<tool_call>.*?</tool_call>",
                                      re.DOTALL)
    self.func_detail_regex = re.compile(
        r"<tool_call>([^\n]*)\n(.*)</tool_call>", re.DOTALL)
    self.func_arg_regex = re.compile(
        r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>",
        re.DOTALL)
    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")

    self.tool_call_start_token_id = self.vocab.get(
        self.tool_call_start_token)
    self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
    self._buffer = ""

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:

    def _is_string_type(
            tool_name: str, arg_name: str,
            tools: Optional[list[ChatCompletionToolsParam]]) -> bool:
        if tools is None:
            return False
        for tool in tools:
            if tool.function.name == tool_name:
                if tool.function.parameters is None:
                    return False
                arg_type = tool.function.parameters.get(
                    "properties", {}).get(arg_name, {}).get("type", None)
                return arg_type == "string"
        logger.warning("No tool named '%s'.", tool_name)
        return False

    def _deserialize(value: str) -> Any:
        try:
            return json.loads(value)
        except Exception:
            pass

        try:
            return ast.literal_eval(value)
        except Exception:
            pass
        return value

    matched_tool_calls = self.func_call_regex.findall(model_output)
    logger.debug("model_output: %s", model_output)
    try:
        tool_calls = []
        for match in matched_tool_calls:
            tc_detail = self.func_detail_regex.search(match)
            tc_name = tc_detail.group(1)
            tc_args = tc_detail.group(2)
            pairs = self.func_arg_regex.findall(tc_args)
            arg_dct = {}
            for key, value in pairs:
                arg_key = key.strip()
                arg_val = value.strip()
                if not _is_string_type(tc_name, arg_key, request.tools):
                    arg_val = _deserialize(arg_val)
                logger.debug("arg_key = %s, arg_val = %s", arg_key,
                             arg_val)
                arg_dct[arg_key] = arg_val
            tool_calls.append(
                ToolCall(type="function",
                         function=FunctionCall(
                             name=tc_name, arguments=json.dumps(arg_dct))))
    except Exception:
        logger.exception("Failed to extract tool call spec")
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)
    else:
        if len(tool_calls) > 0:
            content = model_output[:model_output.
                                   find(self.tool_calls_start_token)]
            return ExtractedToolCallInformation(tools_called=True,
                                                tool_calls=tool_calls,
                                                content=content)
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    self._buffer += delta_text
    cur_text = self._buffer
    start_idx = cur_text.find(self.tool_call_start_token)
    if start_idx == -1:
        self._buffer = ""
        if self.current_tool_id > 0:
            cur_text = ""
        return DeltaMessage(content=cur_text)
    logger.debug("cur_text = %s", cur_text)
    end_idx = cur_text.find(self.tool_call_end_token)
    if end_idx != -1:
        if self.current_tool_id == -1:
            self.current_tool_id = 0
            self.prev_tool_call_arr = []
            self.streamed_args_for_tool = []
        while len(self.prev_tool_call_arr) <= self.current_tool_id:
            self.prev_tool_call_arr.append({})
        while len(self.streamed_args_for_tool) <= self.current_tool_id:
            self.streamed_args_for_tool.append("")

        extracted_tool_calls = self.extract_tool_calls(
            cur_text[:end_idx + len(self.tool_call_end_token)], request)

        if len(extracted_tool_calls.tool_calls) == 0:
            logger.warning("Failed to extract any tool calls.")
            return None
        tool_call = extracted_tool_calls.tool_calls[0]
        self.prev_tool_call_arr[self.current_tool_id] = {
            "name": tool_call.function.name,
            "arguments": json.loads(tool_call.function.arguments)
        }
        self.streamed_args_for_tool[
            self.current_tool_id] = tool_call.function.arguments
        delta = DeltaMessage(
            content=extracted_tool_calls.content,
            tool_calls=[
                DeltaToolCall(index=self.current_tool_id,
                              id=tool_call.id,
                              type=tool_call.type,
                              function=DeltaFunctionCall(
                                  name=tool_call.function.name,
                                  arguments=tool_call.function.arguments))
            ])
        self.current_tool_id += 1
        self._buffer = cur_text[end_idx + len(self.tool_call_end_token):]
        return delta

    self._buffer = cur_text[start_idx:]
    return DeltaMessage(content=cur_text[:start_idx])

Granite20bFCToolParser

Bases: ToolParser

Tool call parser for the granite-20b-functioncalling model intended for use with the examples/tool_chat_template_granite20b_fc.jinja template.

Used when --enable-auto-tool-choice --tool-call-parser granite-20-fc are all set

Source code in vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py
@ToolParserManager.register_module("granite-20b-fc")
class Granite20bFCToolParser(ToolParser):
    """
    Tool call parser for the granite-20b-functioncalling model intended
    for use with the examples/tool_chat_template_granite20b_fc.jinja
    template.

    Used when --enable-auto-tool-choice --tool-call-parser granite-20-fc
    are all set
    """

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        self.bot_token = "<function_call>"
        self.tool_start_token = self.bot_token
        self.tool_call_regex = re.compile(r"<function_call>\s*")

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        if self.tool_start_token not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        dec = JSONDecoder()
        try:
            matches = list(self.tool_call_regex.finditer(model_output))
            logger.debug("Found %d tool call matches", len(matches))

            raw_function_calls = []

            for i, match in enumerate(matches):
                # position after the <function_call> tag
                start_of_json = match.end()
                # end_index == the start of the next function call
                # (if exists)
                next_function_call_start = (matches[i + 1].start() if i +
                                            1 < len(matches) else None)

                raw_function_calls.append(
                    dec.raw_decode(
                        model_output[start_of_json:next_function_call_start])
                    [0])

            logger.debug("Extracted %d tool calls", len(raw_function_calls))
            tool_calls = [
                ToolCall(
                    type="function",
                    function=FunctionCall(
                        name=function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(function_call["arguments"],
                                             ensure_ascii=False),
                    ),
                ) for function_call in raw_function_calls
            ]

            content = model_output[:model_output.find(self.bot_token)]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if content else None,
            )

        except Exception as e:
            logger.error("Error in extracting tool call from response %s", e)
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        if len(current_text) < len(
                self.bot_token) and self.bot_token.startswith(current_text):
            return None

        if not current_text.startswith(self.bot_token):
            return DeltaMessage(content=delta_text)

        # bit mask flags for partial JSON parsing. If the name hasn't been
        # sent yet, don't allow sending
        # an incomplete string since OpenAI only ever (as far as I have
        # seen) allows sending the entire tool/ function name at once.
        flags = Allow.ALL if self.current_tool_name_sent \
            else Allow.ALL & ~Allow.STR
        try:
            tool_call_arr = []
            is_complete = []
            try:
                start_idx = len(self.bot_token)
                start_idx = consume_space(start_idx, current_text)

                while start_idx < len(current_text):
                    (obj,
                     end_idx) = partial_json_loads(current_text[start_idx:],
                                                   flags)
                    is_complete.append(
                        is_complete_json(current_text[start_idx:start_idx +
                                                      end_idx]))
                    start_idx += end_idx
                    start_idx = consume_space(start_idx, current_text)
                    start_idx += len(self.bot_token)
                    start_idx = consume_space(start_idx, current_text)
                    tool_call_arr.append(obj)
            except partial_json_parser.core.exceptions.MalformedJSON:
                logger.debug('not enough tokens to parse into JSON yet')
                return None

            # select as the current tool call the one we're on the state at
            current_tool_call: dict = tool_call_arr[self.current_tool_id] \
                if len(tool_call_arr) > 0 else {}

            # case -- if no tokens have been streamed for the tool, e.g.
            #   only the array brackets, stream nothing
            if len(tool_call_arr) == 0:
                return None

            # case: we are starting a new tool in the array
            #   -> array has > 0 length AND length has moved past cursor
            elif (len(tool_call_arr) > 0
                  and len(tool_call_arr) > self.current_tool_id + 1):

                # if we're moving on to a new call, first make sure we
                # haven't missed anything in the previous one that was
                # auto-generated due to JSON completions, but wasn't
                # streamed to the client yet.
                if self.current_tool_id >= 0:
                    cur_arguments = current_tool_call.get("arguments")
                    if cur_arguments:
                        cur_args_json = json.dumps(cur_arguments,
                                                   ensure_ascii=False)
                        sent = len(
                            self.streamed_args_for_tool[self.current_tool_id])
                        argument_diff = cur_args_json[sent:]

                        logger.debug("got arguments diff: %s", argument_diff)
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=argument_diff).
                                          model_dump(exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += argument_diff
                    else:
                        delta = None
                else:
                    delta = None
                # re-set stuff pertaining to progress in the current tool
                self.current_tool_id = len(tool_call_arr) - 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("starting on new tool %d", self.current_tool_id)
                return delta

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            elif not self.current_tool_name_sent:
                function_name = current_tool_call.get("name")
                if function_name:

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
                    ])
                    self.current_tool_name_sent = True
                else:
                    delta = None

            # now we know we're on the same tool call and we're streaming
            # arguments
            else:
                cur_arguments = current_tool_call.get("arguments")
                delta = None

                if cur_arguments:
                    sent = len(
                        self.streamed_args_for_tool[self.current_tool_id])
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    prev_arguments = self.prev_tool_call_arr[
                        self.current_tool_id].get("arguments")

                    argument_diff = None
                    if is_complete[self.current_tool_id]:
                        argument_diff = cur_args_json[sent:]
                    elif prev_arguments:
                        prev_args_json = json.dumps(prev_arguments,
                                                    ensure_ascii=False)
                        if cur_args_json != prev_args_json:

                            prefix = find_common_prefix(
                                prev_args_json, cur_args_json)
                            argument_diff = prefix[sent:]

                    if argument_diff is not None:
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=argument_diff).
                                          model_dump(exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += argument_diff

            self.prev_tool_call_arr = tool_call_arr
            return delta

        except Exception as e:
            logger.error("Error trying to handle streaming tool call: %s", e)
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

bot_token instance-attribute

bot_token = '<function_call>'

tool_call_regex instance-attribute

tool_call_regex = compile('<function_call>\\s*')

tool_start_token instance-attribute

tool_start_token = bot_token

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    self.bot_token = "<function_call>"
    self.tool_start_token = self.bot_token
    self.tool_call_regex = re.compile(r"<function_call>\s*")

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    if self.tool_start_token not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    dec = JSONDecoder()
    try:
        matches = list(self.tool_call_regex.finditer(model_output))
        logger.debug("Found %d tool call matches", len(matches))

        raw_function_calls = []

        for i, match in enumerate(matches):
            # position after the <function_call> tag
            start_of_json = match.end()
            # end_index == the start of the next function call
            # (if exists)
            next_function_call_start = (matches[i + 1].start() if i +
                                        1 < len(matches) else None)

            raw_function_calls.append(
                dec.raw_decode(
                    model_output[start_of_json:next_function_call_start])
                [0])

        logger.debug("Extracted %d tool calls", len(raw_function_calls))
        tool_calls = [
            ToolCall(
                type="function",
                function=FunctionCall(
                    name=function_call["name"],
                    # function call args are JSON but as a string
                    arguments=json.dumps(function_call["arguments"],
                                         ensure_ascii=False),
                ),
            ) for function_call in raw_function_calls
        ]

        content = model_output[:model_output.find(self.bot_token)]
        return ExtractedToolCallInformation(
            tools_called=True,
            tool_calls=tool_calls,
            content=content if content else None,
        )

    except Exception as e:
        logger.error("Error in extracting tool call from response %s", e)
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    if len(current_text) < len(
            self.bot_token) and self.bot_token.startswith(current_text):
        return None

    if not current_text.startswith(self.bot_token):
        return DeltaMessage(content=delta_text)

    # bit mask flags for partial JSON parsing. If the name hasn't been
    # sent yet, don't allow sending
    # an incomplete string since OpenAI only ever (as far as I have
    # seen) allows sending the entire tool/ function name at once.
    flags = Allow.ALL if self.current_tool_name_sent \
        else Allow.ALL & ~Allow.STR
    try:
        tool_call_arr = []
        is_complete = []
        try:
            start_idx = len(self.bot_token)
            start_idx = consume_space(start_idx, current_text)

            while start_idx < len(current_text):
                (obj,
                 end_idx) = partial_json_loads(current_text[start_idx:],
                                               flags)
                is_complete.append(
                    is_complete_json(current_text[start_idx:start_idx +
                                                  end_idx]))
                start_idx += end_idx
                start_idx = consume_space(start_idx, current_text)
                start_idx += len(self.bot_token)
                start_idx = consume_space(start_idx, current_text)
                tool_call_arr.append(obj)
        except partial_json_parser.core.exceptions.MalformedJSON:
            logger.debug('not enough tokens to parse into JSON yet')
            return None

        # select as the current tool call the one we're on the state at
        current_tool_call: dict = tool_call_arr[self.current_tool_id] \
            if len(tool_call_arr) > 0 else {}

        # case -- if no tokens have been streamed for the tool, e.g.
        #   only the array brackets, stream nothing
        if len(tool_call_arr) == 0:
            return None

        # case: we are starting a new tool in the array
        #   -> array has > 0 length AND length has moved past cursor
        elif (len(tool_call_arr) > 0
              and len(tool_call_arr) > self.current_tool_id + 1):

            # if we're moving on to a new call, first make sure we
            # haven't missed anything in the previous one that was
            # auto-generated due to JSON completions, but wasn't
            # streamed to the client yet.
            if self.current_tool_id >= 0:
                cur_arguments = current_tool_call.get("arguments")
                if cur_arguments:
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    sent = len(
                        self.streamed_args_for_tool[self.current_tool_id])
                    argument_diff = cur_args_json[sent:]

                    logger.debug("got arguments diff: %s", argument_diff)
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff
                else:
                    delta = None
            else:
                delta = None
            # re-set stuff pertaining to progress in the current tool
            self.current_tool_id = len(tool_call_arr) - 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("starting on new tool %d", self.current_tool_id)
            return delta

        # if the current tool name hasn't been sent, send if available
        # - otherwise send nothing
        elif not self.current_tool_name_sent:
            function_name = current_tool_call.get("name")
            if function_name:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=make_tool_call_id(),
                                  function=DeltaFunctionCall(
                                      name=function_name).model_dump(
                                          exclude_none=True))
                ])
                self.current_tool_name_sent = True
            else:
                delta = None

        # now we know we're on the same tool call and we're streaming
        # arguments
        else:
            cur_arguments = current_tool_call.get("arguments")
            delta = None

            if cur_arguments:
                sent = len(
                    self.streamed_args_for_tool[self.current_tool_id])
                cur_args_json = json.dumps(cur_arguments,
                                           ensure_ascii=False)
                prev_arguments = self.prev_tool_call_arr[
                    self.current_tool_id].get("arguments")

                argument_diff = None
                if is_complete[self.current_tool_id]:
                    argument_diff = cur_args_json[sent:]
                elif prev_arguments:
                    prev_args_json = json.dumps(prev_arguments,
                                                ensure_ascii=False)
                    if cur_args_json != prev_args_json:

                        prefix = find_common_prefix(
                            prev_args_json, cur_args_json)
                        argument_diff = prefix[sent:]

                if argument_diff is not None:
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff

        self.prev_tool_call_arr = tool_call_arr
        return delta

    except Exception as e:
        logger.error("Error trying to handle streaming tool call: %s", e)
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

GraniteToolParser

Bases: ToolParser

Tool call parser for the granite 3.0 models. Intended for use with the examples/tool_chat_template_granite.jinja template.

Used when --enable-auto-tool-choice --tool-call-parser granite are all set

Source code in vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
@ToolParserManager.register_module("granite")
class GraniteToolParser(ToolParser):
    """
    Tool call parser for the granite 3.0 models. Intended
    for use with the examples/tool_chat_template_granite.jinja
    template.

    Used when --enable-auto-tool-choice --tool-call-parser granite
    are all set
    """

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)
        # for granite 3.0, the token `<|tool_call|>`
        self.bot_token = "<|tool_call|>"
        # for granite 3.1, the string `<tool_call>`
        self.bot_string = "<tool_call>"

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        stripped = model_output.strip()\
                    .removeprefix(self.bot_token)\
                    .removeprefix(self.bot_string)\
                    .lstrip()
        if not stripped or stripped[0] != '[':
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)
        try:
            raw_function_calls = json.loads(stripped)
            if not isinstance(raw_function_calls, list):
                raise Exception(
                    f"Expected dict or list, got {type(raw_function_calls)}")

            logger.debug("Extracted %d tool calls", len(raw_function_calls))
            tool_calls = [
                ToolCall(
                    type="function",
                    function=FunctionCall(
                        name=function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(function_call["arguments"],
                                             ensure_ascii=False),
                    ),
                ) for function_call in raw_function_calls
            ]

            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=None,
            )

        except Exception as e:
            logger.error("Error in extracting tool call from response %s", e)
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        start_idx = consume_space(0, current_text)
        if current_text[start_idx:].startswith(self.bot_token):
            start_idx = consume_space(start_idx + len(self.bot_token),
                                      current_text)
        if current_text[start_idx:].startswith(self.bot_string):
            start_idx = consume_space(start_idx + len(self.bot_string),
                                      current_text)
        if not current_text or start_idx >= len(current_text)\
            or current_text[start_idx] != '[':
            return DeltaMessage(content=delta_text)

        # bit mask flags for partial JSON parsing. If the name hasn't been
        # sent yet, don't allow sending
        # an incomplete string since OpenAI only ever (as far as I have
        # seen) allows sending the entire tool/ function name at once.
        flags = Allow.ALL if self.current_tool_name_sent \
            else Allow.ALL & ~Allow.STR
        try:
            tool_call_arr = None
            is_complete = None
            try:
                tool_calls, end_idx = partial_json_loads(
                    current_text[start_idx:], flags)
                if type(tool_calls) is list:
                    tool_call_arr = tool_calls
                else:
                    return DeltaMessage(content=delta_text)

                is_complete = [True] * len(tool_calls)
                if not is_complete_json(
                        current_text[start_idx:start_idx + end_idx]):
                    is_complete[-1] = False
            except partial_json_parser.core.exceptions.MalformedJSON:
                logger.debug('not enough tokens to parse into JSON yet')
                return None

            # case -- if no tokens have been streamed for the tool, e.g.
            #   only the array brackets, stream nothing
            if not tool_call_arr:
                return None

            # select as the current tool call the one we're on the state at
            current_tool_call: dict = tool_call_arr[self.current_tool_id]

            delta = None
            # case: we are starting a new tool in the array
            #   -> array has > 0 length AND length has moved past cursor
            if len(tool_call_arr) > self.current_tool_id + 1:

                # if we're moving on to a new call, first make sure we
                # haven't missed anything in the previous one that was
                # auto-generated due to JSON completions, but wasn't
                # streamed to the client yet.
                if self.current_tool_id >= 0:
                    cur_arguments = current_tool_call.get("arguments")
                    if cur_arguments:
                        cur_args_json = json.dumps(cur_arguments,
                                                   ensure_ascii=False)
                        sent = len(
                            self.streamed_args_for_tool[self.current_tool_id])
                        argument_diff = cur_args_json[sent:]

                        logger.debug("got arguments diff: %s", argument_diff)
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=argument_diff).
                                          model_dump(exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += argument_diff

                # re-set stuff pertaining to progress in the current tool
                self.current_tool_id = len(tool_call_arr) - 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("starting on new tool %d", self.current_tool_id)
                return delta

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            elif not self.current_tool_name_sent:
                function_name = current_tool_call.get("name")
                if function_name:

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
                    ])
                    self.current_tool_name_sent = True

            # now we know we're on the same tool call and we're streaming
            # arguments
            else:
                cur_arguments = current_tool_call.get("arguments")

                if cur_arguments:
                    sent = len(
                        self.streamed_args_for_tool[self.current_tool_id])
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    prev_arguments = self.prev_tool_call_arr[
                        self.current_tool_id].get("arguments")

                    argument_diff = None
                    if is_complete[self.current_tool_id]:
                        argument_diff = cur_args_json[sent:]
                    elif prev_arguments:
                        prev_args_json = json.dumps(prev_arguments,
                                                    ensure_ascii=False)
                        if cur_args_json != prev_args_json:
                            prefix = find_common_prefix(
                                prev_args_json, cur_args_json)
                            argument_diff = prefix[sent:]

                    if argument_diff is not None:
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=argument_diff).
                                          model_dump(exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += argument_diff

            self.prev_tool_call_arr = tool_call_arr
            return delta

        except Exception as e:
            logger.error("Error trying to handle streaming tool call: %s", e)
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

bot_string instance-attribute

bot_string = '<tool_call>'

bot_token instance-attribute

bot_token = '<|tool_call|>'

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)
    # for granite 3.0, the token `<|tool_call|>`
    self.bot_token = "<|tool_call|>"
    # for granite 3.1, the string `<tool_call>`
    self.bot_string = "<tool_call>"

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    stripped = model_output.strip()\
                .removeprefix(self.bot_token)\
                .removeprefix(self.bot_string)\
                .lstrip()
    if not stripped or stripped[0] != '[':
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)
    try:
        raw_function_calls = json.loads(stripped)
        if not isinstance(raw_function_calls, list):
            raise Exception(
                f"Expected dict or list, got {type(raw_function_calls)}")

        logger.debug("Extracted %d tool calls", len(raw_function_calls))
        tool_calls = [
            ToolCall(
                type="function",
                function=FunctionCall(
                    name=function_call["name"],
                    # function call args are JSON but as a string
                    arguments=json.dumps(function_call["arguments"],
                                         ensure_ascii=False),
                ),
            ) for function_call in raw_function_calls
        ]

        return ExtractedToolCallInformation(
            tools_called=True,
            tool_calls=tool_calls,
            content=None,
        )

    except Exception as e:
        logger.error("Error in extracting tool call from response %s", e)
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    start_idx = consume_space(0, current_text)
    if current_text[start_idx:].startswith(self.bot_token):
        start_idx = consume_space(start_idx + len(self.bot_token),
                                  current_text)
    if current_text[start_idx:].startswith(self.bot_string):
        start_idx = consume_space(start_idx + len(self.bot_string),
                                  current_text)
    if not current_text or start_idx >= len(current_text)\
        or current_text[start_idx] != '[':
        return DeltaMessage(content=delta_text)

    # bit mask flags for partial JSON parsing. If the name hasn't been
    # sent yet, don't allow sending
    # an incomplete string since OpenAI only ever (as far as I have
    # seen) allows sending the entire tool/ function name at once.
    flags = Allow.ALL if self.current_tool_name_sent \
        else Allow.ALL & ~Allow.STR
    try:
        tool_call_arr = None
        is_complete = None
        try:
            tool_calls, end_idx = partial_json_loads(
                current_text[start_idx:], flags)
            if type(tool_calls) is list:
                tool_call_arr = tool_calls
            else:
                return DeltaMessage(content=delta_text)

            is_complete = [True] * len(tool_calls)
            if not is_complete_json(
                    current_text[start_idx:start_idx + end_idx]):
                is_complete[-1] = False
        except partial_json_parser.core.exceptions.MalformedJSON:
            logger.debug('not enough tokens to parse into JSON yet')
            return None

        # case -- if no tokens have been streamed for the tool, e.g.
        #   only the array brackets, stream nothing
        if not tool_call_arr:
            return None

        # select as the current tool call the one we're on the state at
        current_tool_call: dict = tool_call_arr[self.current_tool_id]

        delta = None
        # case: we are starting a new tool in the array
        #   -> array has > 0 length AND length has moved past cursor
        if len(tool_call_arr) > self.current_tool_id + 1:

            # if we're moving on to a new call, first make sure we
            # haven't missed anything in the previous one that was
            # auto-generated due to JSON completions, but wasn't
            # streamed to the client yet.
            if self.current_tool_id >= 0:
                cur_arguments = current_tool_call.get("arguments")
                if cur_arguments:
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    sent = len(
                        self.streamed_args_for_tool[self.current_tool_id])
                    argument_diff = cur_args_json[sent:]

                    logger.debug("got arguments diff: %s", argument_diff)
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff

            # re-set stuff pertaining to progress in the current tool
            self.current_tool_id = len(tool_call_arr) - 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("starting on new tool %d", self.current_tool_id)
            return delta

        # if the current tool name hasn't been sent, send if available
        # - otherwise send nothing
        elif not self.current_tool_name_sent:
            function_name = current_tool_call.get("name")
            if function_name:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=make_tool_call_id(),
                                  function=DeltaFunctionCall(
                                      name=function_name).model_dump(
                                          exclude_none=True))
                ])
                self.current_tool_name_sent = True

        # now we know we're on the same tool call and we're streaming
        # arguments
        else:
            cur_arguments = current_tool_call.get("arguments")

            if cur_arguments:
                sent = len(
                    self.streamed_args_for_tool[self.current_tool_id])
                cur_args_json = json.dumps(cur_arguments,
                                           ensure_ascii=False)
                prev_arguments = self.prev_tool_call_arr[
                    self.current_tool_id].get("arguments")

                argument_diff = None
                if is_complete[self.current_tool_id]:
                    argument_diff = cur_args_json[sent:]
                elif prev_arguments:
                    prev_args_json = json.dumps(prev_arguments,
                                                ensure_ascii=False)
                    if cur_args_json != prev_args_json:
                        prefix = find_common_prefix(
                            prev_args_json, cur_args_json)
                        argument_diff = prefix[sent:]

                if argument_diff is not None:
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff

        self.prev_tool_call_arr = tool_call_arr
        return delta

    except Exception as e:
        logger.error("Error trying to handle streaming tool call: %s", e)
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

Hermes2ProToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
@ToolParserManager.register_module("hermes")
class Hermes2ProToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        if isinstance(self.model_tokenizer, MistralTokenizer):
            logger.error(
                "Detected Mistral tokenizer when using a Hermes model")
            self.model_tokenizer = self.model_tokenizer.tokenizer

        self.current_tool_name_sent: bool = False
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.streamed_args_for_tool: list[str] = [
        ]  # map what has been streamed for each tool so far to a list

        self.tool_call_start_token: str = "<tool_call>"
        self.tool_call_end_token: str = "</tool_call>"

        self.tool_call_regex = re.compile(
            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL)
        self.scratch_pad_regex = re.compile(
            r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")
        self.tool_call_start_token_ids = self.model_tokenizer.encode(
            self.tool_call_start_token, add_special_tokens=False)
        self.tool_call_end_token_ids = self.model_tokenizer.encode(
            self.tool_call_end_token, add_special_tokens=False)

        self.tool_call_start_token_array = [
            self.model_tokenizer.decode([token_id])
            for token_id in self.tool_call_start_token_ids
        ]

        self.tool_call_end_token_array = [
            self.model_tokenizer.decode([token_id])
            for token_id in self.tool_call_end_token_ids
        ]

        self.buffered_delta_text = ""

    # Very simple idea: when encountering tokens like <, tool, _call, >,
    # <, /, tool, _call, >, store them in a buffer.
    # When the last token is encountered, empty the buffer and return it.
    # If a token appears in an incorrect sequence while storing in the buffer,
    # return the preceding buffer along with the token.
    def tool_call_delta_buffer(self, delta_text: str):
        # If the sequence of tool_call_start or tool_call_end tokens is not yet
        # complete, fill the buffer with the token and return "".
        if (delta_text in self.tool_call_start_token_array
                or delta_text in self.tool_call_end_token_array):
            # If delta_text is the last token of tool_call_start_token or
            # tool_call_end_token, empty the buffer and return
            # the buffered text + delta_text.
            if (delta_text == self.tool_call_start_token_array[-1]
                    or delta_text == self.tool_call_end_token_array[-1]):
                buffered_text = self.buffered_delta_text
                self.buffered_delta_text = ""
                return buffered_text + delta_text
            else:
                self.buffered_delta_text = self.buffered_delta_text + delta_text
                return ""
        else:
            if self.buffered_delta_text:
                buffered_text = self.buffered_delta_text
                self.buffered_delta_text = ""
                return buffered_text + delta_text
            else:
                return delta_text

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:

        # sanity check; avoid unnecessary processing
        if self.tool_call_start_token not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        else:

            try:
                # there are two possible captures - between tags, or between a
                # tag and end-of-string so the result of
                # findall is an array of tuples where one is a function call and
                # the other is None
                function_call_tuples = (
                    self.tool_call_regex.findall(model_output))

                # load the JSON, and then use it to build the Function and
                # Tool Call
                raw_function_calls = [
                    json.loads(match[0] if match[0] else match[1])
                    for match in function_call_tuples
                ]
                tool_calls = [
                    ToolCall(
                        type="function",
                        function=FunctionCall(
                            name=function_call["name"],
                            # function call args are JSON but as a string
                            arguments=json.dumps(function_call["arguments"],
                                                 ensure_ascii=False)))
                    for function_call in raw_function_calls
                ]

                content = model_output[:model_output.
                                       find(self.tool_call_start_token)]
                return ExtractedToolCallInformation(
                    tools_called=True,
                    tool_calls=tool_calls,
                    content=content if content else None)

            except Exception:
                logger.exception(
                    "Error in extracting tool call from response.")
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        # 1. All tokens are parsed based on _text, not token_ids.
        # 2. All incoming text data is processed by the tool_call_delta_buffer
        #    function for buffering before being used for parsing.

        delta_text = self.tool_call_delta_buffer(delta_text)
        # If the last characters of previous_text
        # match self.buffered_delta_text, remove only the matching part.
        if (len(previous_text) >= len(self.buffered_delta_text)
                and previous_text[-len(self.buffered_delta_text):]
                == self.buffered_delta_text):
            previous_text = previous_text[:-len(self.buffered_delta_text)]
            current_text = previous_text + delta_text

        logger.debug("delta_text: %s", delta_text)
        logger.debug("delta_token_ids: %s", delta_token_ids)
        # check to see if we should be streaming a tool call - is there a
        if self.tool_call_start_token not in current_text:
            logger.debug("No tool call tokens found!")
            return DeltaMessage(content=delta_text)

        try:

            # figure out where we are in the parsing by counting tool call
            # start & end tags
            prev_tool_start_count = previous_text.count(
                self.tool_call_start_token)
            prev_tool_end_count = previous_text.count(self.tool_call_end_token)
            cur_tool_start_count = current_text.count(
                self.tool_call_start_token)
            cur_tool_end_count = current_text.count(self.tool_call_end_token)
            tool_call_portion = None
            text_portion = None

            # case: if we're generating text, OR rounding out a tool call
            if (cur_tool_start_count == cur_tool_end_count
                    and prev_tool_end_count == cur_tool_end_count
                    and self.tool_call_end_token not in delta_text):
                logger.debug("Generating text content! skipping tool parsing.")
                return DeltaMessage(content=delta_text)

            if self.tool_call_end_token in delta_text:
                logger.debug("tool_call_end_token in delta_text")
                full_text = current_text + delta_text
                tool_call_portion = full_text.split(
                    self.tool_call_start_token)[-1].split(
                        self.tool_call_end_token)[0].rstrip()
                delta_text = delta_text.split(
                    self.tool_call_end_token)[0].rstrip()
                text_portion = delta_text.split(
                    self.tool_call_end_token)[-1].lstrip()

            # case: if tool open & close tag counts don't match, we're doing
            # imaginary "else" block here
            # something with tools with this diff.
            # flags for partial JSON parting. exported constants from
            # "Allow" are handled via BIT MASK
            flags = Allow.ALL if self.current_tool_name_sent \
                else Allow.ALL & ~Allow.STR

            # case -- we're starting a new tool call
            if (cur_tool_start_count > cur_tool_end_count
                    and cur_tool_start_count > prev_tool_start_count):
                if len(delta_token_ids) > 1:
                    tool_call_portion = current_text.split(
                        self.tool_call_start_token)[-1]
                else:
                    tool_call_portion = None
                    delta = None

                text_portion = None

                # set cursors and state appropriately
                self.current_tool_id += 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("Starting on a new tool %s", self.current_tool_id)

            # case -- we're updating an existing tool call
            elif (cur_tool_start_count > cur_tool_end_count
                  and cur_tool_start_count == prev_tool_start_count):

                # get the portion of the text that's the tool call
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
                text_portion = None

            # case -- the current tool call is being closed.
            elif (cur_tool_start_count == cur_tool_end_count
                  and cur_tool_end_count >= prev_tool_end_count):
                if (self.prev_tool_call_arr is None
                        or len(self.prev_tool_call_arr) == 0):
                    logger.debug(
                        "attempting to close tool call, but no tool call")
                    return None
                diff = self.prev_tool_call_arr[self.current_tool_id].get(
                    "arguments")
                if diff:
                    diff = diff.encode('utf-8').decode(
                        'unicode_escape') if diff is str else diff
                    if ('"}' not in delta_text):
                        return None
                    end_loc = delta_text.rindex('"}')
                    diff = delta_text[:end_loc] + '"}'
                    logger.debug(
                        "Finishing tool and found diff that had not "
                        "been streamed yet: %s", diff)
                    self.streamed_args_for_tool[self.current_tool_id] \
                        += diff
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=diff).model_dump(
                                              exclude_none=True))
                    ])

            # case -- otherwise we're just generating text
            else:
                text = delta_text.replace(self.tool_call_start_token, "")
                text = text.replace(self.tool_call_end_token, "")
                delta = DeltaMessage(tool_calls=[], content=text)
                return delta

            try:

                current_tool_call = partial_json_parser.loads(
                    tool_call_portion or "{}",
                    flags) if tool_call_portion else None
                logger.debug("Parsed tool call %s", current_tool_call)
            except partial_json_parser.core.exceptions.MalformedJSON:
                logger.debug('not enough tokens to parse into JSON yet')
                return None
            except json.decoder.JSONDecodeError:
                logger.debug("unable to parse JSON")
                return None

            # case - we haven't sent the tool name yet. If it's available, send
            #   it. otherwise, wait until it's available.
            if not self.current_tool_name_sent:
                if (current_tool_call is None):
                    return None
                function_name: Union[str, None] = current_tool_call.get("name")
                if function_name:
                    self.current_tool_name_sent = True
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
                    ])
                else:
                    return None
            # case -- otherwise, send the tool call delta

            # if the tool call portion is None, send the delta as text
            if tool_call_portion is None:
                # if there's text but not tool calls, send that -
                # otherwise None to skip chunk
                delta = DeltaMessage(content=delta_text) \
                    if text_portion is not None else None
                return delta

            # now, the nitty-gritty of tool calls
            # now we have the portion to parse as tool call.

            logger.debug("Trying to parse current tool call with ID %s",
                         self.current_tool_id)

            # if we're starting a new tool call, push an empty object in as
            #   a placeholder for the arguments
            if len(self.prev_tool_call_arr) <= self.current_tool_id:
                self.prev_tool_call_arr.append({})

            # main logic for tool parsing here - compare prev. partially-parsed
            #   JSON to the current partially-parsed JSON
            prev_arguments = (
                self.prev_tool_call_arr[self.current_tool_id].get("arguments"))
            cur_arguments = current_tool_call.get("arguments")

            logger.debug("diffing old arguments: %s", prev_arguments)
            logger.debug("against new ones: %s", cur_arguments)

            # case -- no arguments have been created yet. skip sending a delta.
            if not cur_arguments and not prev_arguments:
                logger.debug("Skipping text %s - no arguments", delta_text)
                delta = None

            # case -- prev arguments are defined, but non are now.
            #   probably impossible, but not a fatal error - just keep going
            elif not cur_arguments and prev_arguments:
                logger.error("should be impossible to have arguments reset "
                             "mid-call. skipping streaming anything.")
                delta = None

            # case -- we now have the first info about arguments available from
            #   autocompleting the JSON
            elif cur_arguments and not prev_arguments:

                cur_arguments_json = json.dumps(cur_arguments,
                                                ensure_ascii=False)
                logger.debug("finding %s in %s", delta_text,
                             cur_arguments_json)

                # get the location where previous args differ from current
                if (delta_text not in cur_arguments_json[:-2]):
                    return None
                args_delta_start_loc = cur_arguments_json[:-2]. \
                                           rindex(delta_text) + \
                                           len(delta_text)

                # use that to find the actual delta
                arguments_delta = cur_arguments_json[:args_delta_start_loc]
                logger.debug("First tokens in arguments received: %s",
                             arguments_delta)

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=arguments_delta).model_dump(
                                          exclude_none=True))
                ])
                self.streamed_args_for_tool[self.current_tool_id] \
                    += arguments_delta

            # last case -- we have an update to existing arguments.
            elif cur_arguments and prev_arguments:
                if isinstance(delta_text, str) and len(delta_text.rstrip(
                )) >= 1 and delta_text.rstrip()[-1] == '}':
                    delta_text = delta_text.rstrip()[:-1]

                logger.debug("got diff %s", delta_text)

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=delta_text).model_dump(
                                          exclude_none=True))
                ])
                self.streamed_args_for_tool[self.current_tool_id] \
                    += delta_text

            # handle saving the state for the current tool into
            # the "prev" list for use in diffing for the next iteration
            if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
                self.prev_tool_call_arr[self.current_tool_id] = \
                    current_tool_call
            else:
                self.prev_tool_call_arr.append(current_tool_call)

            return delta

        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            return None  # do not stream a delta. skip this token ID.

buffered_delta_text instance-attribute

buffered_delta_text = ''

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

model_tokenizer instance-attribute

model_tokenizer = tokenizer

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

scratch_pad_regex instance-attribute

scratch_pad_regex = compile(
    "<scratch_pad>(.*?)</scratch_pad>", DOTALL
)

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_end_token instance-attribute

tool_call_end_token: str = '</tool_call>'

tool_call_end_token_array instance-attribute

tool_call_end_token_array = [
    (decode([token_id]))
    for token_id in (tool_call_end_token_ids)
]

tool_call_end_token_ids instance-attribute

tool_call_end_token_ids = encode(
    tool_call_end_token, add_special_tokens=False
)

tool_call_regex instance-attribute

tool_call_regex = compile(
    "<tool_call>(.*?)</tool_call>|<tool_call>(.*)", DOTALL
)

tool_call_start_token instance-attribute

tool_call_start_token: str = '<tool_call>'

tool_call_start_token_array instance-attribute

tool_call_start_token_array = [
    (decode([token_id]))
    for token_id in (tool_call_start_token_ids)
]

tool_call_start_token_ids instance-attribute

tool_call_start_token_ids = encode(
    tool_call_start_token, add_special_tokens=False
)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    if isinstance(self.model_tokenizer, MistralTokenizer):
        logger.error(
            "Detected Mistral tokenizer when using a Hermes model")
        self.model_tokenizer = self.model_tokenizer.tokenizer

    self.current_tool_name_sent: bool = False
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id: int = -1
    self.streamed_args_for_tool: list[str] = [
    ]  # map what has been streamed for each tool so far to a list

    self.tool_call_start_token: str = "<tool_call>"
    self.tool_call_end_token: str = "</tool_call>"

    self.tool_call_regex = re.compile(
        r"<tool_call>(.*?)</tool_call>|<tool_call>(.*)", re.DOTALL)
    self.scratch_pad_regex = re.compile(
        r"<scratch_pad>(.*?)</scratch_pad>", re.DOTALL)

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")
    self.tool_call_start_token_ids = self.model_tokenizer.encode(
        self.tool_call_start_token, add_special_tokens=False)
    self.tool_call_end_token_ids = self.model_tokenizer.encode(
        self.tool_call_end_token, add_special_tokens=False)

    self.tool_call_start_token_array = [
        self.model_tokenizer.decode([token_id])
        for token_id in self.tool_call_start_token_ids
    ]

    self.tool_call_end_token_array = [
        self.model_tokenizer.decode([token_id])
        for token_id in self.tool_call_end_token_ids
    ]

    self.buffered_delta_text = ""

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:

    # sanity check; avoid unnecessary processing
    if self.tool_call_start_token not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    else:

        try:
            # there are two possible captures - between tags, or between a
            # tag and end-of-string so the result of
            # findall is an array of tuples where one is a function call and
            # the other is None
            function_call_tuples = (
                self.tool_call_regex.findall(model_output))

            # load the JSON, and then use it to build the Function and
            # Tool Call
            raw_function_calls = [
                json.loads(match[0] if match[0] else match[1])
                for match in function_call_tuples
            ]
            tool_calls = [
                ToolCall(
                    type="function",
                    function=FunctionCall(
                        name=function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(function_call["arguments"],
                                             ensure_ascii=False)))
                for function_call in raw_function_calls
            ]

            content = model_output[:model_output.
                                   find(self.tool_call_start_token)]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if content else None)

        except Exception:
            logger.exception(
                "Error in extracting tool call from response.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    # 1. All tokens are parsed based on _text, not token_ids.
    # 2. All incoming text data is processed by the tool_call_delta_buffer
    #    function for buffering before being used for parsing.

    delta_text = self.tool_call_delta_buffer(delta_text)
    # If the last characters of previous_text
    # match self.buffered_delta_text, remove only the matching part.
    if (len(previous_text) >= len(self.buffered_delta_text)
            and previous_text[-len(self.buffered_delta_text):]
            == self.buffered_delta_text):
        previous_text = previous_text[:-len(self.buffered_delta_text)]
        current_text = previous_text + delta_text

    logger.debug("delta_text: %s", delta_text)
    logger.debug("delta_token_ids: %s", delta_token_ids)
    # check to see if we should be streaming a tool call - is there a
    if self.tool_call_start_token not in current_text:
        logger.debug("No tool call tokens found!")
        return DeltaMessage(content=delta_text)

    try:

        # figure out where we are in the parsing by counting tool call
        # start & end tags
        prev_tool_start_count = previous_text.count(
            self.tool_call_start_token)
        prev_tool_end_count = previous_text.count(self.tool_call_end_token)
        cur_tool_start_count = current_text.count(
            self.tool_call_start_token)
        cur_tool_end_count = current_text.count(self.tool_call_end_token)
        tool_call_portion = None
        text_portion = None

        # case: if we're generating text, OR rounding out a tool call
        if (cur_tool_start_count == cur_tool_end_count
                and prev_tool_end_count == cur_tool_end_count
                and self.tool_call_end_token not in delta_text):
            logger.debug("Generating text content! skipping tool parsing.")
            return DeltaMessage(content=delta_text)

        if self.tool_call_end_token in delta_text:
            logger.debug("tool_call_end_token in delta_text")
            full_text = current_text + delta_text
            tool_call_portion = full_text.split(
                self.tool_call_start_token)[-1].split(
                    self.tool_call_end_token)[0].rstrip()
            delta_text = delta_text.split(
                self.tool_call_end_token)[0].rstrip()
            text_portion = delta_text.split(
                self.tool_call_end_token)[-1].lstrip()

        # case: if tool open & close tag counts don't match, we're doing
        # imaginary "else" block here
        # something with tools with this diff.
        # flags for partial JSON parting. exported constants from
        # "Allow" are handled via BIT MASK
        flags = Allow.ALL if self.current_tool_name_sent \
            else Allow.ALL & ~Allow.STR

        # case -- we're starting a new tool call
        if (cur_tool_start_count > cur_tool_end_count
                and cur_tool_start_count > prev_tool_start_count):
            if len(delta_token_ids) > 1:
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
            else:
                tool_call_portion = None
                delta = None

            text_portion = None

            # set cursors and state appropriately
            self.current_tool_id += 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("Starting on a new tool %s", self.current_tool_id)

        # case -- we're updating an existing tool call
        elif (cur_tool_start_count > cur_tool_end_count
              and cur_tool_start_count == prev_tool_start_count):

            # get the portion of the text that's the tool call
            tool_call_portion = current_text.split(
                self.tool_call_start_token)[-1]
            text_portion = None

        # case -- the current tool call is being closed.
        elif (cur_tool_start_count == cur_tool_end_count
              and cur_tool_end_count >= prev_tool_end_count):
            if (self.prev_tool_call_arr is None
                    or len(self.prev_tool_call_arr) == 0):
                logger.debug(
                    "attempting to close tool call, but no tool call")
                return None
            diff = self.prev_tool_call_arr[self.current_tool_id].get(
                "arguments")
            if diff:
                diff = diff.encode('utf-8').decode(
                    'unicode_escape') if diff is str else diff
                if ('"}' not in delta_text):
                    return None
                end_loc = delta_text.rindex('"}')
                diff = delta_text[:end_loc] + '"}'
                logger.debug(
                    "Finishing tool and found diff that had not "
                    "been streamed yet: %s", diff)
                self.streamed_args_for_tool[self.current_tool_id] \
                    += diff
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=diff).model_dump(
                                          exclude_none=True))
                ])

        # case -- otherwise we're just generating text
        else:
            text = delta_text.replace(self.tool_call_start_token, "")
            text = text.replace(self.tool_call_end_token, "")
            delta = DeltaMessage(tool_calls=[], content=text)
            return delta

        try:

            current_tool_call = partial_json_parser.loads(
                tool_call_portion or "{}",
                flags) if tool_call_portion else None
            logger.debug("Parsed tool call %s", current_tool_call)
        except partial_json_parser.core.exceptions.MalformedJSON:
            logger.debug('not enough tokens to parse into JSON yet')
            return None
        except json.decoder.JSONDecodeError:
            logger.debug("unable to parse JSON")
            return None

        # case - we haven't sent the tool name yet. If it's available, send
        #   it. otherwise, wait until it's available.
        if not self.current_tool_name_sent:
            if (current_tool_call is None):
                return None
            function_name: Union[str, None] = current_tool_call.get("name")
            if function_name:
                self.current_tool_name_sent = True
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=make_tool_call_id(),
                                  function=DeltaFunctionCall(
                                      name=function_name).model_dump(
                                          exclude_none=True))
                ])
            else:
                return None
        # case -- otherwise, send the tool call delta

        # if the tool call portion is None, send the delta as text
        if tool_call_portion is None:
            # if there's text but not tool calls, send that -
            # otherwise None to skip chunk
            delta = DeltaMessage(content=delta_text) \
                if text_portion is not None else None
            return delta

        # now, the nitty-gritty of tool calls
        # now we have the portion to parse as tool call.

        logger.debug("Trying to parse current tool call with ID %s",
                     self.current_tool_id)

        # if we're starting a new tool call, push an empty object in as
        #   a placeholder for the arguments
        if len(self.prev_tool_call_arr) <= self.current_tool_id:
            self.prev_tool_call_arr.append({})

        # main logic for tool parsing here - compare prev. partially-parsed
        #   JSON to the current partially-parsed JSON
        prev_arguments = (
            self.prev_tool_call_arr[self.current_tool_id].get("arguments"))
        cur_arguments = current_tool_call.get("arguments")

        logger.debug("diffing old arguments: %s", prev_arguments)
        logger.debug("against new ones: %s", cur_arguments)

        # case -- no arguments have been created yet. skip sending a delta.
        if not cur_arguments and not prev_arguments:
            logger.debug("Skipping text %s - no arguments", delta_text)
            delta = None

        # case -- prev arguments are defined, but non are now.
        #   probably impossible, but not a fatal error - just keep going
        elif not cur_arguments and prev_arguments:
            logger.error("should be impossible to have arguments reset "
                         "mid-call. skipping streaming anything.")
            delta = None

        # case -- we now have the first info about arguments available from
        #   autocompleting the JSON
        elif cur_arguments and not prev_arguments:

            cur_arguments_json = json.dumps(cur_arguments,
                                            ensure_ascii=False)
            logger.debug("finding %s in %s", delta_text,
                         cur_arguments_json)

            # get the location where previous args differ from current
            if (delta_text not in cur_arguments_json[:-2]):
                return None
            args_delta_start_loc = cur_arguments_json[:-2]. \
                                       rindex(delta_text) + \
                                       len(delta_text)

            # use that to find the actual delta
            arguments_delta = cur_arguments_json[:args_delta_start_loc]
            logger.debug("First tokens in arguments received: %s",
                         arguments_delta)

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.current_tool_id,
                              function=DeltaFunctionCall(
                                  arguments=arguments_delta).model_dump(
                                      exclude_none=True))
            ])
            self.streamed_args_for_tool[self.current_tool_id] \
                += arguments_delta

        # last case -- we have an update to existing arguments.
        elif cur_arguments and prev_arguments:
            if isinstance(delta_text, str) and len(delta_text.rstrip(
            )) >= 1 and delta_text.rstrip()[-1] == '}':
                delta_text = delta_text.rstrip()[:-1]

            logger.debug("got diff %s", delta_text)

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.current_tool_id,
                              function=DeltaFunctionCall(
                                  arguments=delta_text).model_dump(
                                      exclude_none=True))
            ])
            self.streamed_args_for_tool[self.current_tool_id] \
                += delta_text

        # handle saving the state for the current tool into
        # the "prev" list for use in diffing for the next iteration
        if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
            self.prev_tool_call_arr[self.current_tool_id] = \
                current_tool_call
        else:
            self.prev_tool_call_arr.append(current_tool_call)

        return delta

    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        return None  # do not stream a delta. skip this token ID.

tool_call_delta_buffer

tool_call_delta_buffer(delta_text: str)
Source code in vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
def tool_call_delta_buffer(self, delta_text: str):
    # If the sequence of tool_call_start or tool_call_end tokens is not yet
    # complete, fill the buffer with the token and return "".
    if (delta_text in self.tool_call_start_token_array
            or delta_text in self.tool_call_end_token_array):
        # If delta_text is the last token of tool_call_start_token or
        # tool_call_end_token, empty the buffer and return
        # the buffered text + delta_text.
        if (delta_text == self.tool_call_start_token_array[-1]
                or delta_text == self.tool_call_end_token_array[-1]):
            buffered_text = self.buffered_delta_text
            self.buffered_delta_text = ""
            return buffered_text + delta_text
        else:
            self.buffered_delta_text = self.buffered_delta_text + delta_text
            return ""
    else:
        if self.buffered_delta_text:
            buffered_text = self.buffered_delta_text
            self.buffered_delta_text = ""
            return buffered_text + delta_text
        else:
            return delta_text

HunyuanA13BToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
@ToolParserManager.register_module("hunyuan_a13b")
class HunyuanA13BToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        # Initialize state for streaming mode
        self.prev_tool_calls: list[dict] = []
        self.current_tool_id = -1
        self.current_tool_name_sent = False
        self.streamed_args: list[str] = [
        ]  # Track arguments sent for each tool

        # For backward compatibility with tests
        self.current_tools_sent: list[bool] = []

        # For backward compatibility with serving code
        self.prev_tool_call_arr = []

        # Regex patterns for preprocessing
        self.answer_tool_calls_pattern = re.compile(
            r"<tool_calls>([\s\S]*?)</tool_calls>", re.DOTALL)

        self.tool_name_reg = re.compile(r'"name"\s*:\s*"([^"]+)"')

        self.tool_empty_arg_reg = re.compile(
            r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*\{\s*\}')

        # TODO: not support nested json object in fc arguments.
        self.tool_non_empty_arg_reg = re.compile(
            r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*(\{(?:[^{}]|(?:\{[^{}]*\}))*\})'
        )

        self.bot_string = "<tool_calls>"

        # Define streaming state type to be initialized later
        self.streaming_state: dict[str, Any] = {
            "current_tool_index": -1,
            "tool_ids": [],
            "sent_tools": [],
        }

    def preprocess_model_output(
            self, model_output: str) -> tuple[Optional[str], Optional[str]]:
        # find the location tool call
        for match in self.answer_tool_calls_pattern.finditer(model_output):
            start, end = match.span()
            # check tool_calls whether in side of <think>
            think_regions = [(m.start(), m.end()) for m in re.finditer(
                r"<think>(.*?)</think>", model_output, flags=re.DOTALL)]
            in_think = any(start > t_start and end < t_end
                           for t_start, t_end in think_regions)
            if not in_think:
                content = model_output[:start]
                tool_calls_content = match.group(1).strip()
                try:
                    json.loads(tool_calls_content)
                    return content, tool_calls_content
                except Exception:
                    continue
        return model_output, None

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        """
        Extract tool calls from a complete model output.
        """
        try:
            # Preprocess the model output
            content, potential_tool_calls = self.preprocess_model_output(
                model_output)

            if not potential_tool_calls:
                # some text should be filtered out for no function call
                # this text is in a13b's chat template.
                if content:
                    content = content.replace("助手:", "", 1)
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=content)

            # Parse the potential tool calls as JSON
            tool_calls_data = json.loads(potential_tool_calls)

            # Ensure it's an array
            if not isinstance(tool_calls_data, list):
                logger.debug("Tool calls data is not an array")
                return ExtractedToolCallInformation(
                    tools_called=False,
                    tool_calls=[],
                    content=content or model_output,
                )

            tool_calls: list[ToolCall] = []

            for idx, call in enumerate(tool_calls_data):
                if (not isinstance(call, dict) or "name" not in call
                        or "arguments" not in call):
                    continue

                tool_call = ToolCall(
                    id=f"call_{random_uuid()}",
                    type="function",
                    function=FunctionCall(
                        name=call["name"],
                        arguments=(json.dumps(call["arguments"]) if isinstance(
                            call["arguments"], dict) else call["arguments"]),
                    ),
                )
                tool_calls.append(tool_call)

            if not content or len(content.strip()) == 0:
                # clear the whitespace content.
                content = None

            return ExtractedToolCallInformation(
                tools_called=len(tool_calls) > 0,
                tool_calls=tool_calls,
                content=content,
            )

        except Exception:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        """
        Extract tool calls for streaming mode.
        """

        start_idx = consume_space(0, current_text)
        if current_text[start_idx:].startswith(self.bot_string):
            start_idx = consume_space(start_idx + len(self.bot_string),
                                      current_text)
        if not current_text or start_idx >= len(
                current_text) or current_text[start_idx] != '[':
            return DeltaMessage(content=delta_text)

        self._try_parse_json_tools(current_text[start_idx:])

        test_delta = self._handle_test_compatibility(current_text)
        if test_delta:
            return test_delta

        name_matches = list(self.tool_name_reg.finditer(current_text))
        tool_count = len(name_matches)
        if tool_count == 0:
            return None
        self._ensure_state_arrays(tool_count)
        current_idx = self.streaming_state["current_tool_index"]

        name_delta = self._handle_tool_name_streaming(current_idx, tool_count,
                                                      name_matches)
        if name_delta:
            return name_delta

        args_delta = self._handle_tool_args_streaming(current_text,
                                                      current_idx, tool_count)
        if args_delta:
            return args_delta

        return None

    def _try_parse_json_tools(self, current_text: str):
        try:
            parsed_tools = json.loads(current_text)
            if isinstance(parsed_tools, list):
                self.prev_tool_call_arr = parsed_tools
        except json.JSONDecodeError:
            pass

    def _handle_test_compatibility(self, current_text: str):
        if len(self.current_tools_sent) > 0:
            if (len(self.current_tools_sent) == 1
                    and self.current_tools_sent[0] is False):
                name_match = self.tool_name_reg.search(current_text)
                if name_match:
                    function_name = name_match.group(1)
                    tool_id = f"chatcmpl-tool-{random_uuid()}"
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=0,
                            type="function",
                            id=tool_id,
                            function=DeltaFunctionCall(
                                name=function_name).model_dump(
                                    exclude_none=True),
                        )
                    ])
                    self.current_tools_sent = [True]
                    self.current_tool_id = 0
                    self.streaming_state["current_tool_index"] = 0
                    if len(self.streaming_state["sent_tools"]) == 0:
                        self.streaming_state["sent_tools"].append({
                            "sent_name":
                            True,
                            "sent_arguments_prefix":
                            False,
                            "sent_arguments":
                            "",
                        })
                    else:
                        self.streaming_state["sent_tools"][0][
                            "sent_name"] = True
                    self.current_tool_name_sent = True
                    return delta
        return None

    def _ensure_state_arrays(self, tool_count: int):
        while len(self.streaming_state["sent_tools"]) < tool_count:
            self.streaming_state["sent_tools"].append({
                "sent_name": False,
                "sent_arguments_prefix": False,
                "sent_arguments": "",
            })
        while len(self.streaming_state["tool_ids"]) < tool_count:
            self.streaming_state["tool_ids"].append(None)

    def _handle_tool_name_streaming(self, current_idx: int, tool_count: int,
                                    name_matches):
        if current_idx == -1 or current_idx < tool_count - 1:
            next_idx = current_idx + 1
            if (next_idx < tool_count
                    and not self.streaming_state["sent_tools"][next_idx]
                ["sent_name"]):
                self.streaming_state["current_tool_index"] = next_idx
                self.current_tool_id = next_idx
                current_idx = next_idx
                tool_name = name_matches[current_idx].group(1)
                tool_id = f"call_{current_idx}_{random_uuid()}"
                self.streaming_state["tool_ids"][current_idx] = tool_id
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=current_idx,
                        type="function",
                        id=tool_id,
                        function=DeltaFunctionCall(name=tool_name).model_dump(
                            exclude_none=True),
                    )
                ])
                self.streaming_state["sent_tools"][current_idx][
                    "sent_name"] = True
                self.current_tool_name_sent = True
                while len(self.streamed_args) <= current_idx:
                    self.streamed_args.append("")
                return delta
        return None

    def _handle_tool_args_streaming(self, current_text: str, current_idx: int,
                                    tool_count: int):

        if current_idx >= 0 and current_idx < tool_count:
            empty_args_match = self.tool_empty_arg_reg.search(current_text)
            if empty_args_match and empty_args_match.start() > 0:
                for i in range(tool_count):
                    if i == current_idx:
                        if not self.streaming_state["sent_tools"][current_idx][
                                "sent_arguments_prefix"]:
                            self.streaming_state["sent_tools"][current_idx][
                                "sent_arguments_prefix"] = True
                            self.streaming_state["sent_tools"][current_idx][
                                "sent_arguments"] = "{}"
                            while len(self.streamed_args) <= current_idx:
                                self.streamed_args.append("")
                            self.streamed_args[current_idx] += "{}"
                            delta = DeltaMessage(tool_calls=[
                                DeltaToolCall(
                                    index=current_idx,
                                    function=DeltaFunctionCall(
                                        arguments="{}").model_dump(
                                            exclude_none=True),
                                )
                            ])
                            if current_idx < tool_count - 1:
                                self.streaming_state["current_tool_index"] += 1
                                self.current_tool_id = self.streaming_state[
                                    "current_tool_index"]
                            return delta

            args_matches = list(
                self.tool_non_empty_arg_reg.finditer(current_text))
            if current_idx < len(args_matches):
                args_text = args_matches[current_idx].group(1)
                is_last_tool = current_idx == tool_count - 1
                if not is_last_tool:
                    next_tool_pos = current_text.find(
                        "},{", args_matches[current_idx].start())
                    if next_tool_pos != -1:
                        args_end_pos = (next_tool_pos + 1)
                        args_text = (
                            current_text[args_matches[current_idx].start(
                            ):args_end_pos].split('"arguments":')[1].strip())
                sent_args = self.streaming_state["sent_tools"][current_idx][
                    "sent_arguments"]
                if not self.streaming_state["sent_tools"][current_idx][
                        "sent_arguments_prefix"] and args_text.startswith("{"):
                    self.streaming_state["sent_tools"][current_idx][
                        "sent_arguments_prefix"] = True
                    self.streaming_state["sent_tools"][current_idx][
                        "sent_arguments"] = "{"
                    while len(self.streamed_args) <= current_idx:
                        self.streamed_args.append("")
                    self.streamed_args[current_idx] += "{"
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=current_idx,
                            function=DeltaFunctionCall(
                                arguments="{").model_dump(exclude_none=True),
                        )
                    ])
                    return delta

                if args_text.startswith(sent_args):
                    args_diff = args_text[len(sent_args):]
                    if args_diff:
                        self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments"] = args_text
                        while len(self.streamed_args) <= current_idx:
                            self.streamed_args.append("")
                        self.streamed_args[current_idx] += args_diff
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=current_idx,
                                function=DeltaFunctionCall(
                                    arguments=args_diff).model_dump(
                                        exclude_none=True),
                            )
                        ])
                        return delta

                if args_text.endswith("}") and args_text == sent_args:
                    if current_idx < tool_count - 1:
                        self.streaming_state["current_tool_index"] += 1
                        self.current_tool_id = self.streaming_state[
                            "current_tool_index"]
        return None

answer_tool_calls_pattern instance-attribute

answer_tool_calls_pattern = compile(
    "<tool_calls>([\\s\\S]*?)</tool_calls>", DOTALL
)

bot_string instance-attribute

bot_string = '<tool_calls>'

current_tool_id instance-attribute

current_tool_id = -1

current_tool_name_sent instance-attribute

current_tool_name_sent = False

current_tools_sent instance-attribute

current_tools_sent: list[bool] = []

prev_tool_call_arr instance-attribute

prev_tool_call_arr = []

prev_tool_calls instance-attribute

prev_tool_calls: list[dict] = []

streamed_args instance-attribute

streamed_args: list[str] = []

streaming_state instance-attribute

streaming_state: dict[str, Any] = {
    "current_tool_index": -1,
    "tool_ids": [],
    "sent_tools": [],
}

tool_empty_arg_reg instance-attribute

tool_empty_arg_reg = compile(
    '"name"\\s*:\\s*"[^"]+"\\s*,\\s*"arguments"\\s*:\\s*\\{\\s*\\}'
)

tool_name_reg instance-attribute

tool_name_reg = compile('"name"\\s*:\\s*"([^"]+)"')

tool_non_empty_arg_reg instance-attribute

tool_non_empty_arg_reg = compile(
    '"name"\\s*:\\s*"[^"]+"\\s*,\\s*"arguments"\\s*:\\s*(\\{(?:[^{}]|(?:\\{[^{}]*\\}))*\\})'
)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    # Initialize state for streaming mode
    self.prev_tool_calls: list[dict] = []
    self.current_tool_id = -1
    self.current_tool_name_sent = False
    self.streamed_args: list[str] = [
    ]  # Track arguments sent for each tool

    # For backward compatibility with tests
    self.current_tools_sent: list[bool] = []

    # For backward compatibility with serving code
    self.prev_tool_call_arr = []

    # Regex patterns for preprocessing
    self.answer_tool_calls_pattern = re.compile(
        r"<tool_calls>([\s\S]*?)</tool_calls>", re.DOTALL)

    self.tool_name_reg = re.compile(r'"name"\s*:\s*"([^"]+)"')

    self.tool_empty_arg_reg = re.compile(
        r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*\{\s*\}')

    # TODO: not support nested json object in fc arguments.
    self.tool_non_empty_arg_reg = re.compile(
        r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*(\{(?:[^{}]|(?:\{[^{}]*\}))*\})'
    )

    self.bot_string = "<tool_calls>"

    # Define streaming state type to be initialized later
    self.streaming_state: dict[str, Any] = {
        "current_tool_index": -1,
        "tool_ids": [],
        "sent_tools": [],
    }

_ensure_state_arrays

_ensure_state_arrays(tool_count: int)
Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def _ensure_state_arrays(self, tool_count: int):
    while len(self.streaming_state["sent_tools"]) < tool_count:
        self.streaming_state["sent_tools"].append({
            "sent_name": False,
            "sent_arguments_prefix": False,
            "sent_arguments": "",
        })
    while len(self.streaming_state["tool_ids"]) < tool_count:
        self.streaming_state["tool_ids"].append(None)

_handle_test_compatibility

_handle_test_compatibility(current_text: str)
Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def _handle_test_compatibility(self, current_text: str):
    if len(self.current_tools_sent) > 0:
        if (len(self.current_tools_sent) == 1
                and self.current_tools_sent[0] is False):
            name_match = self.tool_name_reg.search(current_text)
            if name_match:
                function_name = name_match.group(1)
                tool_id = f"chatcmpl-tool-{random_uuid()}"
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=0,
                        type="function",
                        id=tool_id,
                        function=DeltaFunctionCall(
                            name=function_name).model_dump(
                                exclude_none=True),
                    )
                ])
                self.current_tools_sent = [True]
                self.current_tool_id = 0
                self.streaming_state["current_tool_index"] = 0
                if len(self.streaming_state["sent_tools"]) == 0:
                    self.streaming_state["sent_tools"].append({
                        "sent_name":
                        True,
                        "sent_arguments_prefix":
                        False,
                        "sent_arguments":
                        "",
                    })
                else:
                    self.streaming_state["sent_tools"][0][
                        "sent_name"] = True
                self.current_tool_name_sent = True
                return delta
    return None

_handle_tool_args_streaming

_handle_tool_args_streaming(
    current_text: str, current_idx: int, tool_count: int
)
Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def _handle_tool_args_streaming(self, current_text: str, current_idx: int,
                                tool_count: int):

    if current_idx >= 0 and current_idx < tool_count:
        empty_args_match = self.tool_empty_arg_reg.search(current_text)
        if empty_args_match and empty_args_match.start() > 0:
            for i in range(tool_count):
                if i == current_idx:
                    if not self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments_prefix"]:
                        self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments_prefix"] = True
                        self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments"] = "{}"
                        while len(self.streamed_args) <= current_idx:
                            self.streamed_args.append("")
                        self.streamed_args[current_idx] += "{}"
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=current_idx,
                                function=DeltaFunctionCall(
                                    arguments="{}").model_dump(
                                        exclude_none=True),
                            )
                        ])
                        if current_idx < tool_count - 1:
                            self.streaming_state["current_tool_index"] += 1
                            self.current_tool_id = self.streaming_state[
                                "current_tool_index"]
                        return delta

        args_matches = list(
            self.tool_non_empty_arg_reg.finditer(current_text))
        if current_idx < len(args_matches):
            args_text = args_matches[current_idx].group(1)
            is_last_tool = current_idx == tool_count - 1
            if not is_last_tool:
                next_tool_pos = current_text.find(
                    "},{", args_matches[current_idx].start())
                if next_tool_pos != -1:
                    args_end_pos = (next_tool_pos + 1)
                    args_text = (
                        current_text[args_matches[current_idx].start(
                        ):args_end_pos].split('"arguments":')[1].strip())
            sent_args = self.streaming_state["sent_tools"][current_idx][
                "sent_arguments"]
            if not self.streaming_state["sent_tools"][current_idx][
                    "sent_arguments_prefix"] and args_text.startswith("{"):
                self.streaming_state["sent_tools"][current_idx][
                    "sent_arguments_prefix"] = True
                self.streaming_state["sent_tools"][current_idx][
                    "sent_arguments"] = "{"
                while len(self.streamed_args) <= current_idx:
                    self.streamed_args.append("")
                self.streamed_args[current_idx] += "{"
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=current_idx,
                        function=DeltaFunctionCall(
                            arguments="{").model_dump(exclude_none=True),
                    )
                ])
                return delta

            if args_text.startswith(sent_args):
                args_diff = args_text[len(sent_args):]
                if args_diff:
                    self.streaming_state["sent_tools"][current_idx][
                        "sent_arguments"] = args_text
                    while len(self.streamed_args) <= current_idx:
                        self.streamed_args.append("")
                    self.streamed_args[current_idx] += args_diff
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=current_idx,
                            function=DeltaFunctionCall(
                                arguments=args_diff).model_dump(
                                    exclude_none=True),
                        )
                    ])
                    return delta

            if args_text.endswith("}") and args_text == sent_args:
                if current_idx < tool_count - 1:
                    self.streaming_state["current_tool_index"] += 1
                    self.current_tool_id = self.streaming_state[
                        "current_tool_index"]
    return None

_handle_tool_name_streaming

_handle_tool_name_streaming(
    current_idx: int, tool_count: int, name_matches
)
Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def _handle_tool_name_streaming(self, current_idx: int, tool_count: int,
                                name_matches):
    if current_idx == -1 or current_idx < tool_count - 1:
        next_idx = current_idx + 1
        if (next_idx < tool_count
                and not self.streaming_state["sent_tools"][next_idx]
            ["sent_name"]):
            self.streaming_state["current_tool_index"] = next_idx
            self.current_tool_id = next_idx
            current_idx = next_idx
            tool_name = name_matches[current_idx].group(1)
            tool_id = f"call_{current_idx}_{random_uuid()}"
            self.streaming_state["tool_ids"][current_idx] = tool_id
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=current_idx,
                    type="function",
                    id=tool_id,
                    function=DeltaFunctionCall(name=tool_name).model_dump(
                        exclude_none=True),
                )
            ])
            self.streaming_state["sent_tools"][current_idx][
                "sent_name"] = True
            self.current_tool_name_sent = True
            while len(self.streamed_args) <= current_idx:
                self.streamed_args.append("")
            return delta
    return None

_try_parse_json_tools

_try_parse_json_tools(current_text: str)
Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def _try_parse_json_tools(self, current_text: str):
    try:
        parsed_tools = json.loads(current_text)
        if isinstance(parsed_tools, list):
            self.prev_tool_call_arr = parsed_tools
    except json.JSONDecodeError:
        pass

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract tool calls from a complete model output.

Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    """
    Extract tool calls from a complete model output.
    """
    try:
        # Preprocess the model output
        content, potential_tool_calls = self.preprocess_model_output(
            model_output)

        if not potential_tool_calls:
            # some text should be filtered out for no function call
            # this text is in a13b's chat template.
            if content:
                content = content.replace("助手:", "", 1)
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=content)

        # Parse the potential tool calls as JSON
        tool_calls_data = json.loads(potential_tool_calls)

        # Ensure it's an array
        if not isinstance(tool_calls_data, list):
            logger.debug("Tool calls data is not an array")
            return ExtractedToolCallInformation(
                tools_called=False,
                tool_calls=[],
                content=content or model_output,
            )

        tool_calls: list[ToolCall] = []

        for idx, call in enumerate(tool_calls_data):
            if (not isinstance(call, dict) or "name" not in call
                    or "arguments" not in call):
                continue

            tool_call = ToolCall(
                id=f"call_{random_uuid()}",
                type="function",
                function=FunctionCall(
                    name=call["name"],
                    arguments=(json.dumps(call["arguments"]) if isinstance(
                        call["arguments"], dict) else call["arguments"]),
                ),
            )
            tool_calls.append(tool_call)

        if not content or len(content.strip()) == 0:
            # clear the whitespace content.
            content = None

        return ExtractedToolCallInformation(
            tools_called=len(tool_calls) > 0,
            tool_calls=tool_calls,
            content=content,
        )

    except Exception:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]

Extract tool calls for streaming mode.

Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    """
    Extract tool calls for streaming mode.
    """

    start_idx = consume_space(0, current_text)
    if current_text[start_idx:].startswith(self.bot_string):
        start_idx = consume_space(start_idx + len(self.bot_string),
                                  current_text)
    if not current_text or start_idx >= len(
            current_text) or current_text[start_idx] != '[':
        return DeltaMessage(content=delta_text)

    self._try_parse_json_tools(current_text[start_idx:])

    test_delta = self._handle_test_compatibility(current_text)
    if test_delta:
        return test_delta

    name_matches = list(self.tool_name_reg.finditer(current_text))
    tool_count = len(name_matches)
    if tool_count == 0:
        return None
    self._ensure_state_arrays(tool_count)
    current_idx = self.streaming_state["current_tool_index"]

    name_delta = self._handle_tool_name_streaming(current_idx, tool_count,
                                                  name_matches)
    if name_delta:
        return name_delta

    args_delta = self._handle_tool_args_streaming(current_text,
                                                  current_idx, tool_count)
    if args_delta:
        return args_delta

    return None

preprocess_model_output

preprocess_model_output(
    model_output: str,
) -> tuple[Optional[str], Optional[str]]
Source code in vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
def preprocess_model_output(
        self, model_output: str) -> tuple[Optional[str], Optional[str]]:
    # find the location tool call
    for match in self.answer_tool_calls_pattern.finditer(model_output):
        start, end = match.span()
        # check tool_calls whether in side of <think>
        think_regions = [(m.start(), m.end()) for m in re.finditer(
            r"<think>(.*?)</think>", model_output, flags=re.DOTALL)]
        in_think = any(start > t_start and end < t_end
                       for t_start, t_end in think_regions)
        if not in_think:
            content = model_output[:start]
            tool_calls_content = match.group(1).strip()
            try:
                json.loads(tool_calls_content)
                return content, tool_calls_content
            except Exception:
                continue
    return model_output, None

Internlm2ToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
@ToolParserManager.register_module(["internlm"])
class Internlm2ToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)
        self.position = 0

    def adjust_request(
            self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        if request.tools and request.tool_choice != 'none':
            # do not skip special tokens because internlm use the special
            # tokens to indicated the start and end of the tool calls
            # information.
            request.skip_special_tokens = False
        return request

    def get_arguments(self, obj):
        if "parameters" in obj:
            return obj.get("parameters")
        elif "arguments" in obj:
            return obj.get("arguments")
        return None

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        if '<|action_start|>' not in current_text:
            self.position = len(current_text)
            return DeltaMessage(content=delta_text)
        # if the tool call is sended, return a empty delta message
        # to make sure the finish_reason will be send correctly.
        if self.current_tool_id > 0:
            return DeltaMessage(content='')

        last_pos = self.position
        if '<|action_start|><|plugin|>' not in current_text[last_pos:]:
            return None

        new_delta = current_text[last_pos:]
        text, action = new_delta.split('<|action_start|><|plugin|>')

        if len(text) > 0:
            self.position = self.position + len(text)
            return DeltaMessage(content=text)

        action = action.strip()
        action = action.split('<|action_end|>'.strip())[0]

        # bit mask flags for partial JSON parsing. If the name hasn't been
        # sent yet, don't allow sending
        # an incomplete string since OpenAI only ever (as far as I have
        # seen) allows sending the entire tool/ function name at once.
        flags = Allow.ALL if self.current_tool_name_sent \
            else Allow.ALL & ~Allow.STR

        try:
            parsable_arr = action

            # tool calls are generated in an object in inernlm2
            # it's not support parallel tool calls
            try:
                tool_call_arr: dict = partial_json_parser.loads(
                    parsable_arr, flags)
            except partial_json_parser.core.exceptions.MalformedJSON:
                logger.debug('not enough tokens to parse into JSON yet')
                return None

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            if not self.current_tool_name_sent:
                function_name = tool_call_arr.get("name")
                if function_name:
                    self.current_tool_id = self.current_tool_id + 1
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
                    ])
                    self.current_tool_name_sent = True
                    self.streamed_args_for_tool.append("")
                else:
                    delta = None
            # now we know we're on the same tool call and we're streaming
            # arguments
            else:
                prev_arguments = self.get_arguments(
                    self.prev_tool_call_arr[self.current_tool_id])
                cur_arguments = self.get_arguments(tool_call_arr)

                # not arguments generated
                if not cur_arguments and not prev_arguments:
                    delta = None
                # will never happen
                elif not cur_arguments and prev_arguments:
                    logger.error(
                        "INVARIANT - impossible to have arguments reset "
                        "mid-arguments")
                    delta = None
                # first time to get parameters
                elif cur_arguments and not prev_arguments:
                    cur_arguments_json = json.dumps(cur_arguments,
                                                    ensure_ascii=False)

                    arguments_delta = cur_arguments_json[:cur_arguments_json.
                                                         index(delta_text) +
                                                         len(delta_text)]
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=arguments_delta).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += arguments_delta
                # both prev and cur parameters, send the increase parameters
                elif cur_arguments and prev_arguments:
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    prev_args_json = json.dumps(prev_arguments,
                                                ensure_ascii=False)

                    argument_diff = extract_intermediate_diff(
                        cur_args_json, prev_args_json)

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).model_dump(
                                              exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff

            # check to see if the name is defined and has been sent. if so,
            # stream the name - otherwise keep waiting
            # finish by setting old and returning None as base case
            tool_call_arr["arguments"] = self.get_arguments(tool_call_arr)
            self.prev_tool_call_arr = [tool_call_arr]
            return delta
        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        text = model_output
        tools = request.tools
        if '<|action_start|><|plugin|>' in text:
            text, action = text.split('<|action_start|><|plugin|>')
            action = action.split('<|action_end|>'.strip())[0]
            action = action[action.find('{'):]
            action_dict = json.loads(action)
            name, parameters = action_dict['name'], json.dumps(
                action_dict.get('parameters', action_dict.get('arguments',
                                                              {})),
                ensure_ascii=False)

            if not tools or name not in [t.function.name for t in tools]:
                ExtractedToolCallInformation(tools_called=False,
                                             tool_calls=[],
                                             content=text)

            tool_calls = [
                ToolCall(
                    function=FunctionCall(name=name, arguments=parameters))
            ]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=text if len(text) > 0 else None)

        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=text)

position instance-attribute

position = 0

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)
    self.position = 0

adjust_request

adjust_request(
    request: ChatCompletionRequest,
) -> ChatCompletionRequest
Source code in vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
def adjust_request(
        self, request: ChatCompletionRequest) -> ChatCompletionRequest:
    if request.tools and request.tool_choice != 'none':
        # do not skip special tokens because internlm use the special
        # tokens to indicated the start and end of the tool calls
        # information.
        request.skip_special_tokens = False
    return request

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    text = model_output
    tools = request.tools
    if '<|action_start|><|plugin|>' in text:
        text, action = text.split('<|action_start|><|plugin|>')
        action = action.split('<|action_end|>'.strip())[0]
        action = action[action.find('{'):]
        action_dict = json.loads(action)
        name, parameters = action_dict['name'], json.dumps(
            action_dict.get('parameters', action_dict.get('arguments',
                                                          {})),
            ensure_ascii=False)

        if not tools or name not in [t.function.name for t in tools]:
            ExtractedToolCallInformation(tools_called=False,
                                         tool_calls=[],
                                         content=text)

        tool_calls = [
            ToolCall(
                function=FunctionCall(name=name, arguments=parameters))
        ]
        return ExtractedToolCallInformation(
            tools_called=True,
            tool_calls=tool_calls,
            content=text if len(text) > 0 else None)

    return ExtractedToolCallInformation(tools_called=False,
                                        tool_calls=[],
                                        content=text)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    if '<|action_start|>' not in current_text:
        self.position = len(current_text)
        return DeltaMessage(content=delta_text)
    # if the tool call is sended, return a empty delta message
    # to make sure the finish_reason will be send correctly.
    if self.current_tool_id > 0:
        return DeltaMessage(content='')

    last_pos = self.position
    if '<|action_start|><|plugin|>' not in current_text[last_pos:]:
        return None

    new_delta = current_text[last_pos:]
    text, action = new_delta.split('<|action_start|><|plugin|>')

    if len(text) > 0:
        self.position = self.position + len(text)
        return DeltaMessage(content=text)

    action = action.strip()
    action = action.split('<|action_end|>'.strip())[0]

    # bit mask flags for partial JSON parsing. If the name hasn't been
    # sent yet, don't allow sending
    # an incomplete string since OpenAI only ever (as far as I have
    # seen) allows sending the entire tool/ function name at once.
    flags = Allow.ALL if self.current_tool_name_sent \
        else Allow.ALL & ~Allow.STR

    try:
        parsable_arr = action

        # tool calls are generated in an object in inernlm2
        # it's not support parallel tool calls
        try:
            tool_call_arr: dict = partial_json_parser.loads(
                parsable_arr, flags)
        except partial_json_parser.core.exceptions.MalformedJSON:
            logger.debug('not enough tokens to parse into JSON yet')
            return None

        # if the current tool name hasn't been sent, send if available
        # - otherwise send nothing
        if not self.current_tool_name_sent:
            function_name = tool_call_arr.get("name")
            if function_name:
                self.current_tool_id = self.current_tool_id + 1
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=make_tool_call_id(),
                                  function=DeltaFunctionCall(
                                      name=function_name).model_dump(
                                          exclude_none=True))
                ])
                self.current_tool_name_sent = True
                self.streamed_args_for_tool.append("")
            else:
                delta = None
        # now we know we're on the same tool call and we're streaming
        # arguments
        else:
            prev_arguments = self.get_arguments(
                self.prev_tool_call_arr[self.current_tool_id])
            cur_arguments = self.get_arguments(tool_call_arr)

            # not arguments generated
            if not cur_arguments and not prev_arguments:
                delta = None
            # will never happen
            elif not cur_arguments and prev_arguments:
                logger.error(
                    "INVARIANT - impossible to have arguments reset "
                    "mid-arguments")
                delta = None
            # first time to get parameters
            elif cur_arguments and not prev_arguments:
                cur_arguments_json = json.dumps(cur_arguments,
                                                ensure_ascii=False)

                arguments_delta = cur_arguments_json[:cur_arguments_json.
                                                     index(delta_text) +
                                                     len(delta_text)]
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=arguments_delta).
                                  model_dump(exclude_none=True))
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] += arguments_delta
            # both prev and cur parameters, send the increase parameters
            elif cur_arguments and prev_arguments:
                cur_args_json = json.dumps(cur_arguments,
                                           ensure_ascii=False)
                prev_args_json = json.dumps(prev_arguments,
                                            ensure_ascii=False)

                argument_diff = extract_intermediate_diff(
                    cur_args_json, prev_args_json)

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=argument_diff).model_dump(
                                          exclude_none=True))
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] += argument_diff

        # check to see if the name is defined and has been sent. if so,
        # stream the name - otherwise keep waiting
        # finish by setting old and returning None as base case
        tool_call_arr["arguments"] = self.get_arguments(tool_call_arr)
        self.prev_tool_call_arr = [tool_call_arr]
        return delta
    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

get_arguments

get_arguments(obj)
Source code in vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
def get_arguments(self, obj):
    if "parameters" in obj:
        return obj.get("parameters")
    elif "arguments" in obj:
        return obj.get("arguments")
    return None

JambaToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
@ToolParserManager.register_module("jamba")
class JambaToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        if isinstance(self.model_tokenizer, MistralTokenizer):
            raise ValueError(
                "Detected a MistralTokenizer tokenizer when using a Jamba model"
            )

        self.current_tool_name_sent: bool = False
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.streamed_args_for_tool: list[str] = [
        ]  # map what has been streamed for each tool so far to a list

        self.tool_calls_start_token: str = "<tool_calls>"
        self.tool_calls_end_token: str = "</tool_calls>"

        self.tool_calls_regex = re.compile(
            rf"{self.tool_calls_start_token}(.*?){self.tool_calls_end_token}",
            re.DOTALL)

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")
        self.tool_calls_start_token_id = self.vocab.get(
            self.tool_calls_start_token)
        self.tool_calls_end_token_id = self.vocab.get(
            self.tool_calls_end_token)
        if (self.tool_calls_start_token_id is None
                or self.tool_calls_end_token_id is None):
            raise RuntimeError(
                "Jamba Tool parser could not locate tool calls start/end "
                "tokens in the tokenizer!")

    def adjust_request(
            self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        if request.tools and request.tool_choice != 'none':
            # do not skip special tokens because jamba use the special
            # tokens to indicate the start and end of the tool calls
            # information.
            request.skip_special_tokens = False
        return request

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:

        # sanity check; avoid unnecessary processing
        if self.tool_calls_start_token not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        else:

            try:
                # use a regex to find the tool call between the tags
                function_calls = self.tool_calls_regex.findall(model_output)[0]

                # load the JSON, and then use it to build the Function and
                # Tool Call
                raw_function_calls = json.loads(function_calls)
                tool_calls = [
                    ToolCall(
                        type="function",
                        function=FunctionCall(
                            name=function_call["name"],
                            # function call args are JSON but as a string
                            arguments=json.dumps(function_call["arguments"],
                                                 ensure_ascii=False),
                        )) for function_call in raw_function_calls
                ]

                content = model_output[:model_output.
                                       find(self.tool_calls_start_token)]
                return ExtractedToolCallInformation(
                    tools_called=True,
                    tool_calls=tool_calls,
                    content=content if
                    (len(content) > 0 and content != " ") else None)

            except Exception:
                logger.exception(
                    "Error in extracting tool call from response.")
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        # if the tool call token is not in the tokens generated so far, append
        # output to contents since it's not a tool
        if self.tool_calls_start_token not in current_text:
            return DeltaMessage(content=delta_text)

        # if the tool call token ID IS in the tokens generated so far, that
        # means we're parsing as tool calls now

        # handle if we detected the start of tool calls token which means
        # the start of tool calling
        if (self.tool_calls_start_token_id in delta_token_ids
                and len(delta_token_ids) == 1):
            # if it's the only token, return None, so we don't send a chat
            # completion and don't send a control token
            return None

        # bit mask flags for partial JSON parsing. If the name hasn't been
        # sent yet, don't allow sending
        # an incomplete string since OpenAI only ever (as far as I have
        # seen) allows sending the entire tool/ function name at once.
        flags = Allow.ALL if self.current_tool_name_sent \
            else Allow.ALL & ~Allow.STR
        try:

            # Extract the tool calls between the special tool call tokens
            parsable_arr = current_text.split(
                self.tool_calls_start_token)[-1].split(
                    self.tool_calls_end_token)[0]

            # tool calls are generated in an array, so do partial JSON
            # parsing on the entire array
            try:
                tool_call_arr: list[dict] = partial_json_parser.loads(
                    parsable_arr, flags)
            except partial_json_parser.core.exceptions.MalformedJSON:
                logger.debug('not enough tokens to parse into JSON yet')
                return None

            # select as the current tool call the one we're on the state at

            current_tool_call: dict = tool_call_arr[self.current_tool_id] \
                if len(tool_call_arr) > 0 else {}

            # case -- if no tokens have been streamed for the tool, e.g.
            #   only the array brackets, stream nothing
            if len(tool_call_arr) == 0:
                return None

            # case: we are starting a new tool in the array
            #   -> array has > 0 length AND length has moved past cursor
            elif (len(tool_call_arr) > 0
                  and len(tool_call_arr) > self.current_tool_id + 1):

                # if we're moving on to a new call, first make sure we
                # haven't missed anything in the previous one that was
                # auto-generated due to JSON completions, but wasn't
                # streamed to the client yet.
                if self.current_tool_id >= 0:
                    diff: Union[str, None] = current_tool_call.get("arguments")

                    if diff:
                        diff = json.dumps(diff, ensure_ascii=False).replace(
                            self.streamed_args_for_tool[self.current_tool_id],
                            "")
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=diff).model_dump(
                                                  exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += diff
                    else:
                        delta = None
                else:
                    delta = None
                # re-set stuff pertaining to progress in the current tool
                self.current_tool_id = len(tool_call_arr) - 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("starting on new tool %d", self.current_tool_id)
                return delta

            # case: update an existing tool - this is handled below

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            if not self.current_tool_name_sent:
                function_name = current_tool_call.get("name")
                if function_name:

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
                    ])
                    self.current_tool_name_sent = True
                else:
                    delta = None

            # now we know we're on the same tool call and we're streaming
            # arguments
            else:

                prev_arguments = self.prev_tool_call_arr[
                    self.current_tool_id].get("arguments")
                cur_arguments = current_tool_call.get("arguments")

                new_text = delta_text.replace("\'", "\"")

                if not cur_arguments and not prev_arguments:

                    delta = None
                elif not cur_arguments and prev_arguments:
                    logger.error(
                        "INVARIANT - impossible to have arguments reset "
                        "mid-arguments")
                    delta = None
                elif cur_arguments and not prev_arguments:
                    cur_arguments_json = json.dumps(cur_arguments,
                                                    ensure_ascii=False)
                    logger.debug("finding %s in %s", new_text,
                                 cur_arguments_json)

                    arguments_delta = cur_arguments_json[:cur_arguments_json.
                                                         index(new_text) +
                                                         len(new_text)]
                    logger.debug("First tokens in arguments received: %s",
                                 arguments_delta)
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=arguments_delta).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += arguments_delta

                elif cur_arguments and prev_arguments:
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    prev_args_json = json.dumps(prev_arguments,
                                                ensure_ascii=False)
                    logger.debug("Searching for diff between \n%s\n%s",
                                 cur_args_json, prev_args_json)

                    argument_diff = extract_intermediate_diff(
                        cur_args_json, prev_args_json)
                    logger.debug("got arguments diff: %s", argument_diff)
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).model_dump(
                                              exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff
                else:
                    # try parsing it with regular JSON - if it works we're
                    # at the end, and we need to send the difference between
                    # tokens streamed so far and the valid JSON
                    delta = None

            # check to see if the name is defined and has been sent. if so,
            # stream the name - otherwise keep waiting
            # finish by setting old and returning None as base case
            self.prev_tool_call_arr = tool_call_arr
            return delta

        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_calls_end_token instance-attribute

tool_calls_end_token: str = '</tool_calls>'

tool_calls_end_token_id instance-attribute

tool_calls_end_token_id = get(tool_calls_end_token)

tool_calls_regex instance-attribute

tool_calls_regex = compile(
    f"{tool_calls_start_token}(.*?){tool_calls_end_token}",
    DOTALL,
)

tool_calls_start_token instance-attribute

tool_calls_start_token: str = '<tool_calls>'

tool_calls_start_token_id instance-attribute

tool_calls_start_token_id = get(tool_calls_start_token)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    if isinstance(self.model_tokenizer, MistralTokenizer):
        raise ValueError(
            "Detected a MistralTokenizer tokenizer when using a Jamba model"
        )

    self.current_tool_name_sent: bool = False
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id: int = -1
    self.streamed_args_for_tool: list[str] = [
    ]  # map what has been streamed for each tool so far to a list

    self.tool_calls_start_token: str = "<tool_calls>"
    self.tool_calls_end_token: str = "</tool_calls>"

    self.tool_calls_regex = re.compile(
        rf"{self.tool_calls_start_token}(.*?){self.tool_calls_end_token}",
        re.DOTALL)

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")
    self.tool_calls_start_token_id = self.vocab.get(
        self.tool_calls_start_token)
    self.tool_calls_end_token_id = self.vocab.get(
        self.tool_calls_end_token)
    if (self.tool_calls_start_token_id is None
            or self.tool_calls_end_token_id is None):
        raise RuntimeError(
            "Jamba Tool parser could not locate tool calls start/end "
            "tokens in the tokenizer!")

adjust_request

adjust_request(
    request: ChatCompletionRequest,
) -> ChatCompletionRequest
Source code in vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
def adjust_request(
        self, request: ChatCompletionRequest) -> ChatCompletionRequest:
    if request.tools and request.tool_choice != 'none':
        # do not skip special tokens because jamba use the special
        # tokens to indicate the start and end of the tool calls
        # information.
        request.skip_special_tokens = False
    return request

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:

    # sanity check; avoid unnecessary processing
    if self.tool_calls_start_token not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    else:

        try:
            # use a regex to find the tool call between the tags
            function_calls = self.tool_calls_regex.findall(model_output)[0]

            # load the JSON, and then use it to build the Function and
            # Tool Call
            raw_function_calls = json.loads(function_calls)
            tool_calls = [
                ToolCall(
                    type="function",
                    function=FunctionCall(
                        name=function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(function_call["arguments"],
                                             ensure_ascii=False),
                    )) for function_call in raw_function_calls
            ]

            content = model_output[:model_output.
                                   find(self.tool_calls_start_token)]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if
                (len(content) > 0 and content != " ") else None)

        except Exception:
            logger.exception(
                "Error in extracting tool call from response.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    # if the tool call token is not in the tokens generated so far, append
    # output to contents since it's not a tool
    if self.tool_calls_start_token not in current_text:
        return DeltaMessage(content=delta_text)

    # if the tool call token ID IS in the tokens generated so far, that
    # means we're parsing as tool calls now

    # handle if we detected the start of tool calls token which means
    # the start of tool calling
    if (self.tool_calls_start_token_id in delta_token_ids
            and len(delta_token_ids) == 1):
        # if it's the only token, return None, so we don't send a chat
        # completion and don't send a control token
        return None

    # bit mask flags for partial JSON parsing. If the name hasn't been
    # sent yet, don't allow sending
    # an incomplete string since OpenAI only ever (as far as I have
    # seen) allows sending the entire tool/ function name at once.
    flags = Allow.ALL if self.current_tool_name_sent \
        else Allow.ALL & ~Allow.STR
    try:

        # Extract the tool calls between the special tool call tokens
        parsable_arr = current_text.split(
            self.tool_calls_start_token)[-1].split(
                self.tool_calls_end_token)[0]

        # tool calls are generated in an array, so do partial JSON
        # parsing on the entire array
        try:
            tool_call_arr: list[dict] = partial_json_parser.loads(
                parsable_arr, flags)
        except partial_json_parser.core.exceptions.MalformedJSON:
            logger.debug('not enough tokens to parse into JSON yet')
            return None

        # select as the current tool call the one we're on the state at

        current_tool_call: dict = tool_call_arr[self.current_tool_id] \
            if len(tool_call_arr) > 0 else {}

        # case -- if no tokens have been streamed for the tool, e.g.
        #   only the array brackets, stream nothing
        if len(tool_call_arr) == 0:
            return None

        # case: we are starting a new tool in the array
        #   -> array has > 0 length AND length has moved past cursor
        elif (len(tool_call_arr) > 0
              and len(tool_call_arr) > self.current_tool_id + 1):

            # if we're moving on to a new call, first make sure we
            # haven't missed anything in the previous one that was
            # auto-generated due to JSON completions, but wasn't
            # streamed to the client yet.
            if self.current_tool_id >= 0:
                diff: Union[str, None] = current_tool_call.get("arguments")

                if diff:
                    diff = json.dumps(diff, ensure_ascii=False).replace(
                        self.streamed_args_for_tool[self.current_tool_id],
                        "")
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=diff).model_dump(
                                              exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += diff
                else:
                    delta = None
            else:
                delta = None
            # re-set stuff pertaining to progress in the current tool
            self.current_tool_id = len(tool_call_arr) - 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("starting on new tool %d", self.current_tool_id)
            return delta

        # case: update an existing tool - this is handled below

        # if the current tool name hasn't been sent, send if available
        # - otherwise send nothing
        if not self.current_tool_name_sent:
            function_name = current_tool_call.get("name")
            if function_name:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=make_tool_call_id(),
                                  function=DeltaFunctionCall(
                                      name=function_name).model_dump(
                                          exclude_none=True))
                ])
                self.current_tool_name_sent = True
            else:
                delta = None

        # now we know we're on the same tool call and we're streaming
        # arguments
        else:

            prev_arguments = self.prev_tool_call_arr[
                self.current_tool_id].get("arguments")
            cur_arguments = current_tool_call.get("arguments")

            new_text = delta_text.replace("\'", "\"")

            if not cur_arguments and not prev_arguments:

                delta = None
            elif not cur_arguments and prev_arguments:
                logger.error(
                    "INVARIANT - impossible to have arguments reset "
                    "mid-arguments")
                delta = None
            elif cur_arguments and not prev_arguments:
                cur_arguments_json = json.dumps(cur_arguments,
                                                ensure_ascii=False)
                logger.debug("finding %s in %s", new_text,
                             cur_arguments_json)

                arguments_delta = cur_arguments_json[:cur_arguments_json.
                                                     index(new_text) +
                                                     len(new_text)]
                logger.debug("First tokens in arguments received: %s",
                             arguments_delta)
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=arguments_delta).
                                  model_dump(exclude_none=True))
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] += arguments_delta

            elif cur_arguments and prev_arguments:
                cur_args_json = json.dumps(cur_arguments,
                                           ensure_ascii=False)
                prev_args_json = json.dumps(prev_arguments,
                                            ensure_ascii=False)
                logger.debug("Searching for diff between \n%s\n%s",
                             cur_args_json, prev_args_json)

                argument_diff = extract_intermediate_diff(
                    cur_args_json, prev_args_json)
                logger.debug("got arguments diff: %s", argument_diff)
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=argument_diff).model_dump(
                                          exclude_none=True))
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] += argument_diff
            else:
                # try parsing it with regular JSON - if it works we're
                # at the end, and we need to send the difference between
                # tokens streamed so far and the valid JSON
                delta = None

        # check to see if the name is defined and has been sent. if so,
        # stream the name - otherwise keep waiting
        # finish by setting old and returning None as base case
        self.prev_tool_call_arr = tool_call_arr
        return delta

    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

KimiK2ToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
@ToolParserManager.register_module(["kimi_k2"])
class KimiK2ToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)
        self.current_tool_name_sent: bool = False
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.streamed_args_for_tool: list[str] = (
            [])  # map what has been streamed for each tool so far to a list

        self.tool_calls_start_token: str = "<|tool_calls_section_begin|>"
        self.tool_calls_end_token: str = "<|tool_calls_section_end|>"

        self.tool_call_start_token: str = "<|tool_call_begin|>"
        self.tool_call_end_token: str = "<|tool_call_end|>"

        self.tool_call_regex = re.compile(
            r"<\|tool_call_begin\|>\s*(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*?)\s*<\|tool_call_end\|>"
        )

        self.stream_tool_call_portion_regex = re.compile(
            r"(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*)"
        )

        self.stream_tool_call_name_regex = re.compile(
            r"(?P<tool_call_id>.+:\d+)\s*")

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")
        self.tool_calls_start_token_id = self.vocab.get(
            self.tool_calls_start_token)
        self.tool_calls_end_token_id = self.vocab.get(
            self.tool_calls_end_token)

        self.tool_call_start_token_id = self.vocab.get(
            self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

        if (self.tool_calls_start_token_id is None
                or self.tool_calls_end_token_id is None):
            raise RuntimeError(
                "Kimi-K2 Tool parser could not locate tool call start/end "
                "tokens in the tokenizer!")

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:

        # sanity check; avoid unnecessary processing
        if self.tool_calls_start_token not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        else:
            try:
                # there are two possible captures - between tags, or between a
                # tag and end-of-string so the result of
                # findall is an array of tuples where one is a function call and
                # the other is None
                function_call_tuples = self.tool_call_regex.findall(
                    model_output)

                logger.debug("function_call_tuples: %s", function_call_tuples)

                tool_calls = []
                for match in function_call_tuples:
                    function_id, function_args = match
                    # function_id: functions.get_weather:0
                    function_name = function_id.split('.')[1].split(':')[0]
                    tool_calls.append(
                        ToolCall(
                            id=function_id,
                            type='function',
                            function=FunctionCall(name=function_name,
                                                  arguments=function_args),
                        ))

                content = model_output[:model_output.
                                       find(self.tool_calls_start_token)]
                return ExtractedToolCallInformation(
                    tools_called=True,
                    tool_calls=tool_calls,
                    content=content if content else None,
                )

            except Exception:
                logger.exception(
                    "Error in extracting tool call from response.")
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        logger.debug("delta_text: %s", delta_text)
        logger.debug("delta_token_ids: %s", delta_token_ids)
        # check to see if we should be streaming a tool call - is there a
        if self.tool_calls_start_token_id not in current_token_ids:
            logger.debug("No tool call tokens found!")
            return DeltaMessage(content=delta_text)
        delta_text = delta_text.replace(self.tool_calls_start_token,
                                        "").replace(self.tool_calls_end_token,
                                                    "")
        try:

            # figure out where we are in the parsing by counting tool call
            # start & end tags
            prev_tool_start_count = previous_token_ids.count(
                self.tool_call_start_token_id)
            prev_tool_end_count = previous_token_ids.count(
                self.tool_call_end_token_id)
            cur_tool_start_count = current_token_ids.count(
                self.tool_call_start_token_id)
            cur_tool_end_count = current_token_ids.count(
                self.tool_call_end_token_id)
            tool_call_portion = None
            text_portion = None

            # case: if we're generating text, OR rounding out a tool call
            if (cur_tool_start_count == cur_tool_end_count
                    and prev_tool_end_count == cur_tool_end_count
                    and self.tool_call_end_token not in delta_text):
                logger.debug("Generating text content! skipping tool parsing.")
                return DeltaMessage(content=delta_text)

            if self.tool_call_end_token in delta_text:
                logger.debug("tool_call_end_token in delta_text")
                full_text = current_text + delta_text
                tool_call_portion = full_text.split(
                    self.tool_call_start_token)[-1].split(
                        self.tool_call_end_token)[0].rstrip()
                delta_text = delta_text.split(
                    self.tool_call_end_token)[0].rstrip()
                text_portion = delta_text.split(
                    self.tool_call_end_token)[-1].lstrip()

            # case -- we're starting a new tool call
            if (cur_tool_start_count > cur_tool_end_count
                    and cur_tool_start_count > prev_tool_start_count):
                if len(delta_token_ids) > 1:
                    tool_call_portion = current_text.split(
                        self.tool_call_start_token)[-1]
                else:
                    tool_call_portion = None
                    delta = None

                text_portion = None

                # set cursors and state appropriately
                self.current_tool_id += 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("Starting on a new tool %s", self.current_tool_id)

            # case -- we're updating an existing tool call
            elif (cur_tool_start_count > cur_tool_end_count
                  and cur_tool_start_count == prev_tool_start_count):

                # get the portion of the text that's the tool call
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
                text_portion = None

            # case -- the current tool call is being closed.
            elif (cur_tool_start_count == cur_tool_end_count
                  and cur_tool_end_count >= prev_tool_end_count):
                if self.prev_tool_call_arr is None or len(
                        self.prev_tool_call_arr) == 0:
                    logger.debug(
                        "attempting to close tool call, but no tool call")
                    return None
                diff = self.prev_tool_call_arr[self.current_tool_id].get(
                    "arguments")
                if diff:
                    diff = (diff.encode("utf-8").decode("unicode_escape")
                            if diff is str else diff)
                    if '"}' not in delta_text:
                        return None
                    end_loc = delta_text.rindex('"}')
                    diff = delta_text[:end_loc] + '"}'
                    logger.debug(
                        "Finishing tool and found diff that had not "
                        "been streamed yet: %s",
                        diff,
                    )
                    self.streamed_args_for_tool[self.current_tool_id] += diff
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            function=DeltaFunctionCall(
                                arguments=diff).model_dump(exclude_none=True),
                        )
                    ])

            # case -- otherwise we're just generating text
            else:
                text = delta_text.replace(self.tool_call_start_token, "")
                text = text.replace(self.tool_call_end_token, "")
                delta = DeltaMessage(tool_calls=[], content=text)
                return delta

            current_tool_call = dict()
            if tool_call_portion:
                current_tool_call_matches = (
                    self.stream_tool_call_portion_regex.match(
                        tool_call_portion))
                if current_tool_call_matches:
                    tool_id, tool_args = (current_tool_call_matches.groups())
                    tool_name = tool_id.split('.')[1].split(':')[0]
                    current_tool_call['id'] = tool_id
                    current_tool_call["name"] = tool_name
                    current_tool_call["arguments"] = tool_args
                else:
                    current_tool_call_name_matches = (
                        self.stream_tool_call_name_regex.match(
                            tool_call_portion))
                    if current_tool_call_name_matches:
                        tool_id_str, = current_tool_call_name_matches.groups()
                        tool_name = tool_id_str.split('.')[1].split(':')[0]
                        current_tool_call['id'] = tool_id_str
                        current_tool_call["name"] = tool_name
                        current_tool_call["arguments"] = ""
                    else:
                        logger.debug("Not enough token")
                        return None

            # case - we haven't sent the tool name yet. If it's available, send
            #   it. otherwise, wait until it's available.
            if not self.current_tool_name_sent:
                if current_tool_call is None:
                    return None
                function_name: Union[str, None] = current_tool_call.get("name")
                tool_id = current_tool_call.get("id")
                if function_name:
                    self.current_tool_name_sent = True
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            type="function",
                            id=tool_id,
                            function=DeltaFunctionCall(
                                name=function_name).model_dump(
                                    exclude_none=True),
                        )
                    ])
                else:
                    return None

            # case -- otherwise, send the tool call delta

            # if the tool call portion is None, send the delta as text
            if tool_call_portion is None:
                # if there's text but not tool calls, send that -
                # otherwise None to skip chunk
                delta = (DeltaMessage(
                    content=delta_text) if text_portion is not None else None)
                return delta

            # now, the nitty-gritty of tool calls
            # now we have the portion to parse as tool call.

            logger.debug("Trying to parse current tool call with ID %s",
                         self.current_tool_id)

            # if we're starting a new tool call, push an empty object in as
            #   a placeholder for the arguments
            if len(self.prev_tool_call_arr) <= self.current_tool_id:
                self.prev_tool_call_arr.append({})

            # main logic for tool parsing here - compare prev. partially-parsed
            #   JSON to the current partially-parsed JSON
            prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
                "arguments")
            cur_arguments = current_tool_call.get("arguments")

            logger.debug("diffing old arguments: %s", prev_arguments)
            logger.debug("against new ones: %s", cur_arguments)

            # case -- no arguments have been created yet. skip sending a delta.
            if not cur_arguments and not prev_arguments:
                logger.debug("Skipping text %s - no arguments", delta_text)
                delta = None

            # case -- prev arguments are defined, but non are now.
            #   probably impossible, but not a fatal error - just keep going
            elif not cur_arguments and prev_arguments:
                logger.error("should be impossible to have arguments reset "
                             "mid-call. skipping streaming anything.")
                delta = None

            # case -- we now have the first info about arguments available from
            #   autocompleting the JSON
            elif cur_arguments and not prev_arguments:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=cur_arguments).model_dump(
                                exclude_none=True),
                    )
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] = cur_arguments

            # last case -- we have an update to existing arguments.
            elif cur_arguments and prev_arguments:
                if (isinstance(delta_text, str)
                        and cur_arguments != prev_arguments
                        and len(cur_arguments) > len(prev_arguments)
                        and cur_arguments.startswith(prev_arguments)):
                    delta_arguments = cur_arguments[len(prev_arguments):]
                    logger.debug("got diff %s", delta_text)

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_id,
                            function=DeltaFunctionCall(
                                arguments=delta_arguments).model_dump(
                                    exclude_none=True),
                        )
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] = cur_arguments
                else:
                    delta = None

            # handle saving the state for the current tool into
            # the "prev" list for use in diffing for the next iteration
            if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
                self.prev_tool_call_arr[
                    self.current_tool_id] = current_tool_call
            else:
                self.prev_tool_call_arr.append(current_tool_call)

            return delta

        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            return None  # do not stream a delta. skip this token ID.

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

stream_tool_call_name_regex instance-attribute

stream_tool_call_name_regex = compile(
    "(?P<tool_call_id>.+:\\d+)\\s*"
)

stream_tool_call_portion_regex instance-attribute

stream_tool_call_portion_regex = compile(
    "(?P<tool_call_id>.+:\\d+)\\s*<\\|tool_call_argument_begin\\|>\\s*(?P<function_arguments>.*)"
)

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_end_token instance-attribute

tool_call_end_token: str = '<|tool_call_end|>'

tool_call_end_token_id instance-attribute

tool_call_end_token_id = get(tool_call_end_token)

tool_call_regex instance-attribute

tool_call_regex = compile(
    "<\\|tool_call_begin\\|>\\s*(?P<tool_call_id>.+:\\d+)\\s*<\\|tool_call_argument_begin\\|>\\s*(?P<function_arguments>.*?)\\s*<\\|tool_call_end\\|>"
)

tool_call_start_token instance-attribute

tool_call_start_token: str = '<|tool_call_begin|>'

tool_call_start_token_id instance-attribute

tool_call_start_token_id = get(tool_call_start_token)

tool_calls_end_token instance-attribute

tool_calls_end_token: str = '<|tool_calls_section_end|>'

tool_calls_end_token_id instance-attribute

tool_calls_end_token_id = get(tool_calls_end_token)

tool_calls_start_token instance-attribute

tool_calls_start_token: str = "<|tool_calls_section_begin|>"

tool_calls_start_token_id instance-attribute

tool_calls_start_token_id = get(tool_calls_start_token)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)
    self.current_tool_name_sent: bool = False
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id: int = -1
    self.streamed_args_for_tool: list[str] = (
        [])  # map what has been streamed for each tool so far to a list

    self.tool_calls_start_token: str = "<|tool_calls_section_begin|>"
    self.tool_calls_end_token: str = "<|tool_calls_section_end|>"

    self.tool_call_start_token: str = "<|tool_call_begin|>"
    self.tool_call_end_token: str = "<|tool_call_end|>"

    self.tool_call_regex = re.compile(
        r"<\|tool_call_begin\|>\s*(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*?)\s*<\|tool_call_end\|>"
    )

    self.stream_tool_call_portion_regex = re.compile(
        r"(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*)"
    )

    self.stream_tool_call_name_regex = re.compile(
        r"(?P<tool_call_id>.+:\d+)\s*")

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")
    self.tool_calls_start_token_id = self.vocab.get(
        self.tool_calls_start_token)
    self.tool_calls_end_token_id = self.vocab.get(
        self.tool_calls_end_token)

    self.tool_call_start_token_id = self.vocab.get(
        self.tool_call_start_token)
    self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

    if (self.tool_calls_start_token_id is None
            or self.tool_calls_end_token_id is None):
        raise RuntimeError(
            "Kimi-K2 Tool parser could not locate tool call start/end "
            "tokens in the tokenizer!")

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:

    # sanity check; avoid unnecessary processing
    if self.tool_calls_start_token not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    else:
        try:
            # there are two possible captures - between tags, or between a
            # tag and end-of-string so the result of
            # findall is an array of tuples where one is a function call and
            # the other is None
            function_call_tuples = self.tool_call_regex.findall(
                model_output)

            logger.debug("function_call_tuples: %s", function_call_tuples)

            tool_calls = []
            for match in function_call_tuples:
                function_id, function_args = match
                # function_id: functions.get_weather:0
                function_name = function_id.split('.')[1].split(':')[0]
                tool_calls.append(
                    ToolCall(
                        id=function_id,
                        type='function',
                        function=FunctionCall(name=function_name,
                                              arguments=function_args),
                    ))

            content = model_output[:model_output.
                                   find(self.tool_calls_start_token)]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if content else None,
            )

        except Exception:
            logger.exception(
                "Error in extracting tool call from response.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    logger.debug("delta_text: %s", delta_text)
    logger.debug("delta_token_ids: %s", delta_token_ids)
    # check to see if we should be streaming a tool call - is there a
    if self.tool_calls_start_token_id not in current_token_ids:
        logger.debug("No tool call tokens found!")
        return DeltaMessage(content=delta_text)
    delta_text = delta_text.replace(self.tool_calls_start_token,
                                    "").replace(self.tool_calls_end_token,
                                                "")
    try:

        # figure out where we are in the parsing by counting tool call
        # start & end tags
        prev_tool_start_count = previous_token_ids.count(
            self.tool_call_start_token_id)
        prev_tool_end_count = previous_token_ids.count(
            self.tool_call_end_token_id)
        cur_tool_start_count = current_token_ids.count(
            self.tool_call_start_token_id)
        cur_tool_end_count = current_token_ids.count(
            self.tool_call_end_token_id)
        tool_call_portion = None
        text_portion = None

        # case: if we're generating text, OR rounding out a tool call
        if (cur_tool_start_count == cur_tool_end_count
                and prev_tool_end_count == cur_tool_end_count
                and self.tool_call_end_token not in delta_text):
            logger.debug("Generating text content! skipping tool parsing.")
            return DeltaMessage(content=delta_text)

        if self.tool_call_end_token in delta_text:
            logger.debug("tool_call_end_token in delta_text")
            full_text = current_text + delta_text
            tool_call_portion = full_text.split(
                self.tool_call_start_token)[-1].split(
                    self.tool_call_end_token)[0].rstrip()
            delta_text = delta_text.split(
                self.tool_call_end_token)[0].rstrip()
            text_portion = delta_text.split(
                self.tool_call_end_token)[-1].lstrip()

        # case -- we're starting a new tool call
        if (cur_tool_start_count > cur_tool_end_count
                and cur_tool_start_count > prev_tool_start_count):
            if len(delta_token_ids) > 1:
                tool_call_portion = current_text.split(
                    self.tool_call_start_token)[-1]
            else:
                tool_call_portion = None
                delta = None

            text_portion = None

            # set cursors and state appropriately
            self.current_tool_id += 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("Starting on a new tool %s", self.current_tool_id)

        # case -- we're updating an existing tool call
        elif (cur_tool_start_count > cur_tool_end_count
              and cur_tool_start_count == prev_tool_start_count):

            # get the portion of the text that's the tool call
            tool_call_portion = current_text.split(
                self.tool_call_start_token)[-1]
            text_portion = None

        # case -- the current tool call is being closed.
        elif (cur_tool_start_count == cur_tool_end_count
              and cur_tool_end_count >= prev_tool_end_count):
            if self.prev_tool_call_arr is None or len(
                    self.prev_tool_call_arr) == 0:
                logger.debug(
                    "attempting to close tool call, but no tool call")
                return None
            diff = self.prev_tool_call_arr[self.current_tool_id].get(
                "arguments")
            if diff:
                diff = (diff.encode("utf-8").decode("unicode_escape")
                        if diff is str else diff)
                if '"}' not in delta_text:
                    return None
                end_loc = delta_text.rindex('"}')
                diff = delta_text[:end_loc] + '"}'
                logger.debug(
                    "Finishing tool and found diff that had not "
                    "been streamed yet: %s",
                    diff,
                )
                self.streamed_args_for_tool[self.current_tool_id] += diff
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=diff).model_dump(exclude_none=True),
                    )
                ])

        # case -- otherwise we're just generating text
        else:
            text = delta_text.replace(self.tool_call_start_token, "")
            text = text.replace(self.tool_call_end_token, "")
            delta = DeltaMessage(tool_calls=[], content=text)
            return delta

        current_tool_call = dict()
        if tool_call_portion:
            current_tool_call_matches = (
                self.stream_tool_call_portion_regex.match(
                    tool_call_portion))
            if current_tool_call_matches:
                tool_id, tool_args = (current_tool_call_matches.groups())
                tool_name = tool_id.split('.')[1].split(':')[0]
                current_tool_call['id'] = tool_id
                current_tool_call["name"] = tool_name
                current_tool_call["arguments"] = tool_args
            else:
                current_tool_call_name_matches = (
                    self.stream_tool_call_name_regex.match(
                        tool_call_portion))
                if current_tool_call_name_matches:
                    tool_id_str, = current_tool_call_name_matches.groups()
                    tool_name = tool_id_str.split('.')[1].split(':')[0]
                    current_tool_call['id'] = tool_id_str
                    current_tool_call["name"] = tool_name
                    current_tool_call["arguments"] = ""
                else:
                    logger.debug("Not enough token")
                    return None

        # case - we haven't sent the tool name yet. If it's available, send
        #   it. otherwise, wait until it's available.
        if not self.current_tool_name_sent:
            if current_tool_call is None:
                return None
            function_name: Union[str, None] = current_tool_call.get("name")
            tool_id = current_tool_call.get("id")
            if function_name:
                self.current_tool_name_sent = True
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        type="function",
                        id=tool_id,
                        function=DeltaFunctionCall(
                            name=function_name).model_dump(
                                exclude_none=True),
                    )
                ])
            else:
                return None

        # case -- otherwise, send the tool call delta

        # if the tool call portion is None, send the delta as text
        if tool_call_portion is None:
            # if there's text but not tool calls, send that -
            # otherwise None to skip chunk
            delta = (DeltaMessage(
                content=delta_text) if text_portion is not None else None)
            return delta

        # now, the nitty-gritty of tool calls
        # now we have the portion to parse as tool call.

        logger.debug("Trying to parse current tool call with ID %s",
                     self.current_tool_id)

        # if we're starting a new tool call, push an empty object in as
        #   a placeholder for the arguments
        if len(self.prev_tool_call_arr) <= self.current_tool_id:
            self.prev_tool_call_arr.append({})

        # main logic for tool parsing here - compare prev. partially-parsed
        #   JSON to the current partially-parsed JSON
        prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
            "arguments")
        cur_arguments = current_tool_call.get("arguments")

        logger.debug("diffing old arguments: %s", prev_arguments)
        logger.debug("against new ones: %s", cur_arguments)

        # case -- no arguments have been created yet. skip sending a delta.
        if not cur_arguments and not prev_arguments:
            logger.debug("Skipping text %s - no arguments", delta_text)
            delta = None

        # case -- prev arguments are defined, but non are now.
        #   probably impossible, but not a fatal error - just keep going
        elif not cur_arguments and prev_arguments:
            logger.error("should be impossible to have arguments reset "
                         "mid-call. skipping streaming anything.")
            delta = None

        # case -- we now have the first info about arguments available from
        #   autocompleting the JSON
        elif cur_arguments and not prev_arguments:

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.current_tool_id,
                    function=DeltaFunctionCall(
                        arguments=cur_arguments).model_dump(
                            exclude_none=True),
                )
            ])
            self.streamed_args_for_tool[
                self.current_tool_id] = cur_arguments

        # last case -- we have an update to existing arguments.
        elif cur_arguments and prev_arguments:
            if (isinstance(delta_text, str)
                    and cur_arguments != prev_arguments
                    and len(cur_arguments) > len(prev_arguments)
                    and cur_arguments.startswith(prev_arguments)):
                delta_arguments = cur_arguments[len(prev_arguments):]
                logger.debug("got diff %s", delta_text)

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_id,
                        function=DeltaFunctionCall(
                            arguments=delta_arguments).model_dump(
                                exclude_none=True),
                    )
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] = cur_arguments
            else:
                delta = None

        # handle saving the state for the current tool into
        # the "prev" list for use in diffing for the next iteration
        if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
            self.prev_tool_call_arr[
                self.current_tool_id] = current_tool_call
        else:
            self.prev_tool_call_arr.append(current_tool_call)

        return delta

    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        return None  # do not stream a delta. skip this token ID.

Llama3JsonToolParser

Bases: ToolParser

Tool call parser for Llama 3.x and 4 models intended for use with the examples/tool_chat_template_llama.jinja template.

Used when --enable-auto-tool-choice --tool-call-parser llama3_json or llama4_json are set.

Source code in vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
@ToolParserManager.register_module("llama3_json")
@ToolParserManager.register_module("llama4_json")
class Llama3JsonToolParser(ToolParser):
    """
    Tool call parser for Llama 3.x and 4 models intended for use with the
    examples/tool_chat_template_llama.jinja template.

    Used when --enable-auto-tool-choice --tool-call-parser llama3_json or 
    llama4_json are set.
    """

    def __init__(self, tokenizer: PreTrainedTokenizerBase):
        super().__init__(tokenizer)

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
        self.streamed_args_for_tool: list[str] = [
        ]  # map what has been streamed for each tool so far to a list
        self.bot_token = "<|python_tag|>"
        self.bot_token_id = tokenizer.encode(self.bot_token,
                                             add_special_tokens=False)[0]
        # Updated regex to match multiple JSONs separated by semicolons
        # This pattern is more robust and can handle nested JSON objects
        self.tool_call_regex = re.compile(
            r'{[^{}]*(?:{[^{}]*}[^{}]*)*}(?:\s*;\s*{[^{}]*(?:{[^{}]*}[^{}]*)*})*',
            re.DOTALL)

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        """
        Extract the tool calls from a complete model response.
        Only extracts JSON content and ignores any surrounding plain text.
        Supports both single JSON and multiple JSONs separated by semicolons.
        """
        # Quick check before running regex
        if not (self.bot_token in model_output or '{' in model_output):
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        # Find JSON object(s) in the text using regex
        match = self.tool_call_regex.search(model_output)
        if not match:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        try:
            json_str = match.group(0)
            # Split by semicolon and strip whitespace
            json_objects = [obj.strip() for obj in json_str.split(';')]

            tool_calls: list[ToolCall] = []
            for json_obj in json_objects:
                if not json_obj:  # Skip empty strings
                    continue
                obj = json.loads(json_obj)
                tool_calls.append(
                    ToolCall(
                        type="function",
                        function=FunctionCall(
                            name=obj["name"],
                            # function call args are JSON but as a string
                            arguments=json.dumps(
                                obj["arguments"]
                                if "arguments" in obj else obj["parameters"],
                                ensure_ascii=False))))

            return ExtractedToolCallInformation(tools_called=True,
                                                tool_calls=tool_calls,
                                                content=None)

        except Exception:
            logger.exception("Error in extracting tool call from response.")
            # return information to just treat the tool call as regular JSON
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        if not (current_text.startswith(self.bot_token)
                or current_text.startswith('{')):
            return DeltaMessage(content=delta_text)

        # bit mask flags for partial JSON parsing. If the name hasn't been
        # sent yet, don't allow sending
        # an incomplete string since OpenAI only ever (as far as I have
        # seen) allows sending the entire tool/ function name at once.
        flags = Allow.ALL if self.current_tool_name_sent \
            else Allow.ALL & ~Allow.STR
        try:
            tool_call_arr = []
            is_complete = []
            try:
                # depending on the prompt format the Llama model may or may not
                # prefix the output with the <|python_tag|> token
                start_idx = len(self.bot_token) if current_text.startswith(
                    self.bot_token) else 0
                while start_idx < len(current_text):
                    (obj,
                     end_idx) = partial_json_loads(current_text[start_idx:],
                                                   flags)
                    is_complete.append(
                        is_complete_json(current_text[start_idx:start_idx +
                                                      end_idx]))
                    start_idx += end_idx + len('; ')
                    # depending on the prompt Llama can use
                    # either arguments or parameters
                    if "parameters" in obj:
                        assert "arguments" not in obj, \
                            "model generated both parameters and arguments"
                        obj["arguments"] = obj["parameters"]
                    tool_call_arr.append(obj)
            except partial_json_parser.core.exceptions.MalformedJSON:
                logger.debug('not enough tokens to parse into JSON yet')
                return None

            # select as the current tool call the one we're on the state at
            current_tool_call: dict = tool_call_arr[self.current_tool_id] \
                if len(tool_call_arr) > 0 else {}

            # case -- if no tokens have been streamed for the tool, e.g.
            #   only the array brackets, stream nothing
            if len(tool_call_arr) == 0:
                return None

            # case: we are starting a new tool in the array
            #   -> array has > 0 length AND length has moved past cursor
            elif (len(tool_call_arr) > 0
                  and len(tool_call_arr) > self.current_tool_id + 1):

                # if we're moving on to a new call, first make sure we
                # haven't missed anything in the previous one that was
                # auto-generated due to JSON completions, but wasn't
                # streamed to the client yet.
                if self.current_tool_id >= 0:
                    cur_arguments = current_tool_call.get("arguments")
                    if cur_arguments:
                        cur_args_json = json.dumps(cur_arguments,
                                                   ensure_ascii=False)
                        sent = len(
                            self.streamed_args_for_tool[self.current_tool_id])
                        argument_diff = cur_args_json[sent:]

                        logger.debug("got arguments diff: %s", argument_diff)
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=argument_diff).
                                          model_dump(exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += argument_diff
                    else:
                        delta = None
                else:
                    delta = None
                # re-set stuff pertaining to progress in the current tool
                self.current_tool_id = len(tool_call_arr) - 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("starting on new tool %d", self.current_tool_id)
                return delta

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            elif not self.current_tool_name_sent:
                function_name = current_tool_call.get("name")
                if function_name:

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=make_tool_call_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
                    ])
                    self.current_tool_name_sent = True
                else:
                    delta = None

            # now we know we're on the same tool call and we're streaming
            # arguments
            else:
                cur_arguments = current_tool_call.get("arguments")
                delta = None

                if cur_arguments:
                    sent = len(
                        self.streamed_args_for_tool[self.current_tool_id])
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    prev_arguments = self.prev_tool_call_arr[
                        self.current_tool_id].get("arguments")

                    argument_diff = None
                    if is_complete[self.current_tool_id]:
                        argument_diff = cur_args_json[sent:]
                    elif prev_arguments:
                        prev_args_json = json.dumps(prev_arguments,
                                                    ensure_ascii=False)
                        if cur_args_json != prev_args_json:

                            prefix = find_common_prefix(
                                prev_args_json, cur_args_json)
                            argument_diff = prefix[sent:]

                    if argument_diff is not None:
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=argument_diff).
                                          model_dump(exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += argument_diff

            self.prev_tool_call_arr = tool_call_arr
            return delta

        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

bot_token instance-attribute

bot_token = '<|python_tag|>'

bot_token_id instance-attribute

bot_token_id = encode(bot_token, add_special_tokens=False)[
    0
]

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_regex instance-attribute

tool_call_regex = compile(
    "{[^{}]*(?:{[^{}]*}[^{}]*)*}(?:\\s*;\\s*{[^{}]*(?:{[^{}]*}[^{}]*)*})*",
    DOTALL,
)

__init__

__init__(tokenizer: PreTrainedTokenizerBase)
Source code in vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
def __init__(self, tokenizer: PreTrainedTokenizerBase):
    super().__init__(tokenizer)

    # initialize properties used for state when parsing tool calls in
    # streaming mode
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id: int = -1
    self.current_tool_name_sent: bool = False
    self.streamed_args_for_tool: list[str] = [
    ]  # map what has been streamed for each tool so far to a list
    self.bot_token = "<|python_tag|>"
    self.bot_token_id = tokenizer.encode(self.bot_token,
                                         add_special_tokens=False)[0]
    # Updated regex to match multiple JSONs separated by semicolons
    # This pattern is more robust and can handle nested JSON objects
    self.tool_call_regex = re.compile(
        r'{[^{}]*(?:{[^{}]*}[^{}]*)*}(?:\s*;\s*{[^{}]*(?:{[^{}]*}[^{}]*)*})*',
        re.DOTALL)

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract the tool calls from a complete model response. Only extracts JSON content and ignores any surrounding plain text. Supports both single JSON and multiple JSONs separated by semicolons.

Source code in vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    """
    Extract the tool calls from a complete model response.
    Only extracts JSON content and ignores any surrounding plain text.
    Supports both single JSON and multiple JSONs separated by semicolons.
    """
    # Quick check before running regex
    if not (self.bot_token in model_output or '{' in model_output):
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    # Find JSON object(s) in the text using regex
    match = self.tool_call_regex.search(model_output)
    if not match:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    try:
        json_str = match.group(0)
        # Split by semicolon and strip whitespace
        json_objects = [obj.strip() for obj in json_str.split(';')]

        tool_calls: list[ToolCall] = []
        for json_obj in json_objects:
            if not json_obj:  # Skip empty strings
                continue
            obj = json.loads(json_obj)
            tool_calls.append(
                ToolCall(
                    type="function",
                    function=FunctionCall(
                        name=obj["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(
                            obj["arguments"]
                            if "arguments" in obj else obj["parameters"],
                            ensure_ascii=False))))

        return ExtractedToolCallInformation(tools_called=True,
                                            tool_calls=tool_calls,
                                            content=None)

    except Exception:
        logger.exception("Error in extracting tool call from response.")
        # return information to just treat the tool call as regular JSON
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    if not (current_text.startswith(self.bot_token)
            or current_text.startswith('{')):
        return DeltaMessage(content=delta_text)

    # bit mask flags for partial JSON parsing. If the name hasn't been
    # sent yet, don't allow sending
    # an incomplete string since OpenAI only ever (as far as I have
    # seen) allows sending the entire tool/ function name at once.
    flags = Allow.ALL if self.current_tool_name_sent \
        else Allow.ALL & ~Allow.STR
    try:
        tool_call_arr = []
        is_complete = []
        try:
            # depending on the prompt format the Llama model may or may not
            # prefix the output with the <|python_tag|> token
            start_idx = len(self.bot_token) if current_text.startswith(
                self.bot_token) else 0
            while start_idx < len(current_text):
                (obj,
                 end_idx) = partial_json_loads(current_text[start_idx:],
                                               flags)
                is_complete.append(
                    is_complete_json(current_text[start_idx:start_idx +
                                                  end_idx]))
                start_idx += end_idx + len('; ')
                # depending on the prompt Llama can use
                # either arguments or parameters
                if "parameters" in obj:
                    assert "arguments" not in obj, \
                        "model generated both parameters and arguments"
                    obj["arguments"] = obj["parameters"]
                tool_call_arr.append(obj)
        except partial_json_parser.core.exceptions.MalformedJSON:
            logger.debug('not enough tokens to parse into JSON yet')
            return None

        # select as the current tool call the one we're on the state at
        current_tool_call: dict = tool_call_arr[self.current_tool_id] \
            if len(tool_call_arr) > 0 else {}

        # case -- if no tokens have been streamed for the tool, e.g.
        #   only the array brackets, stream nothing
        if len(tool_call_arr) == 0:
            return None

        # case: we are starting a new tool in the array
        #   -> array has > 0 length AND length has moved past cursor
        elif (len(tool_call_arr) > 0
              and len(tool_call_arr) > self.current_tool_id + 1):

            # if we're moving on to a new call, first make sure we
            # haven't missed anything in the previous one that was
            # auto-generated due to JSON completions, but wasn't
            # streamed to the client yet.
            if self.current_tool_id >= 0:
                cur_arguments = current_tool_call.get("arguments")
                if cur_arguments:
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    sent = len(
                        self.streamed_args_for_tool[self.current_tool_id])
                    argument_diff = cur_args_json[sent:]

                    logger.debug("got arguments diff: %s", argument_diff)
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff
                else:
                    delta = None
            else:
                delta = None
            # re-set stuff pertaining to progress in the current tool
            self.current_tool_id = len(tool_call_arr) - 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("starting on new tool %d", self.current_tool_id)
            return delta

        # if the current tool name hasn't been sent, send if available
        # - otherwise send nothing
        elif not self.current_tool_name_sent:
            function_name = current_tool_call.get("name")
            if function_name:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=make_tool_call_id(),
                                  function=DeltaFunctionCall(
                                      name=function_name).model_dump(
                                          exclude_none=True))
                ])
                self.current_tool_name_sent = True
            else:
                delta = None

        # now we know we're on the same tool call and we're streaming
        # arguments
        else:
            cur_arguments = current_tool_call.get("arguments")
            delta = None

            if cur_arguments:
                sent = len(
                    self.streamed_args_for_tool[self.current_tool_id])
                cur_args_json = json.dumps(cur_arguments,
                                           ensure_ascii=False)
                prev_arguments = self.prev_tool_call_arr[
                    self.current_tool_id].get("arguments")

                argument_diff = None
                if is_complete[self.current_tool_id]:
                    argument_diff = cur_args_json[sent:]
                elif prev_arguments:
                    prev_args_json = json.dumps(prev_arguments,
                                                ensure_ascii=False)
                    if cur_args_json != prev_args_json:

                        prefix = find_common_prefix(
                            prev_args_json, cur_args_json)
                        argument_diff = prefix[sent:]

                if argument_diff is not None:
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff

        self.prev_tool_call_arr = tool_call_arr
        return delta

    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

Llama4PythonicToolParser

Bases: ToolParser

Toolcall parser for Llama4 that produce tool calls in a pythonic style Use --enable-auto-tool-choice --tool-call-parser llama4_pythonic

Source code in vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py
@ToolParserManager.register_module("llama4_pythonic")
class Llama4PythonicToolParser(ToolParser):
    """
    Toolcall parser for Llama4 that produce tool calls in a pythonic style
    Use --enable-auto-tool-choice --tool-call-parser llama4_pythonic
    """
    # TODO(mdepinet): Possible future improvements:
    #   1. Support text + tools separated by either <|python_tag|> or \n\n
    #   2. Support tools outside of a list (or separated by a semicolon).
    #      This depends on item 1 for consistent streaming.
    # Neither of these are necessary for e.g. ToolACE, but both would help make
    # Llama3.2 models more reliable.

    TOOL_CALL_REGEX = re.compile(
        r"\[([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s)?\),\s*)*([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s*)?\)\s*)+\]",
        re.DOTALL)

    def __init__(self, tokenizer: PreTrainedTokenizerBase):
        super().__init__(tokenizer)

    # Rename for readability. This is NOT a tool id.
    @property
    def current_tool_index(self) -> int:
        return self.current_tool_id

    @current_tool_index.setter
    def current_tool_index(self, value: int) -> None:
        self.current_tool_id = value

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        """
        Extract the tool calls from a complete model response.
        """

        # remove <|python_start|> and <|python_end|>
        # as Llama 4 model sometime will output those tokens
        if model_output.startswith("<|python_start|>"):
            model_output = model_output[len("<|python_start|>"):]
            model_output = model_output.replace("<|python_end|>", "")

        is_tool_call_pattern = False
        try:
            is_tool_call_pattern = self.TOOL_CALL_REGEX.match(
                model_output,
                timeout=envs.VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS) is not None
        except TimeoutError:
            logger.warning(
                "Regex timeout occurred when matching tool call pattern.")
            logger.debug("Regex timeout occurred when matching user input: %s",
                         model_output)

        if not is_tool_call_pattern:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        try:
            module = ast.parse(model_output)
            parsed = getattr(module.body[0], "value", None)
            if isinstance(parsed, ast.List) and all(
                    isinstance(e, ast.Call) for e in parsed.elts):
                return ExtractedToolCallInformation(
                    tools_called=True,
                    tool_calls=[
                        _handle_single_tool(e)  # type: ignore
                        for e in parsed.elts
                    ],
                    content=None)
            else:
                raise _UnexpectedAstError(
                    "Tool output must be a list of function calls")
        except Exception:
            logger.exception("Error in extracting tool call from response.")
            # Treat as regular text
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        if not current_text.startswith("[") and not current_text.startswith(
                "<|python_start|>"):
            return DeltaMessage(content=delta_text)

        try:
            # remove <|python_start|> and <|python_end|>
            if current_text.startswith("<|python_start|>"):
                current_text = current_text[len("<|python_start|>"):]
            if current_text.endswith("<|python_end|>"):
                current_text = current_text[:current_text.
                                            rfind("<|python_end|>")]
            valid_and_added_text = _make_valid_python(current_text)
            if valid_and_added_text is None:
                return None
            valid_text, added_text = valid_and_added_text

            module = ast.parse(valid_text)
            parsed = getattr(module.body[0], "value", None)
            if not isinstance(parsed, ast.List) or not all(
                    isinstance(e, ast.Call) for e in parsed.elts):
                raise _UnexpectedAstError(
                    "Tool output must be a list of function calls")
            tool_calls = [
                _handle_single_tool(e)  # type: ignore
                for e in parsed.elts
            ]

            tool_deltas = []
            for index, new_call in enumerate(tool_calls):
                if index < self.current_tool_index:
                    continue

                self.current_tool_index = index
                if len(self.streamed_args_for_tool) == index:
                    self.streamed_args_for_tool.append("")

                new_call_complete = index < len(
                    tool_calls) - 1 or ")]" not in added_text
                if new_call_complete:
                    self.current_tool_index += 1

                withheld_suffix = (added_text[:-2]
                                   if not new_call_complete else "")
                if not new_call_complete and added_text[-2] == ")":
                    # Function call is incomplete. Withhold the closing bracket.
                    withheld_suffix = withheld_suffix + "}"
                # Strings get single quotes in the model-produced string.
                # JSON requires double quotes.
                withheld_suffix = withheld_suffix.replace("'", '"')
                delta = _compute_tool_delta(self.streamed_args_for_tool[index],
                                            new_call, index, withheld_suffix)

                if delta is not None:
                    tool_deltas.append(delta)
                    if (delta.function is not None
                            and delta.function.arguments is not None):
                        self.streamed_args_for_tool[
                            index] += delta.function.arguments

        # HACK: serving_chat.py inspects the internal state of tool parsers
        # when determining it's final streaming delta, automatically
        # adding autocompleted JSON.
        # These two lines avoid that nonsense while ensuring finish_reason
        # is set to tool_calls when at least one tool is called.
            if tool_deltas and not self.prev_tool_call_arr:
                self.prev_tool_call_arr = [{"arguments": {}}]

            if tool_deltas:
                return DeltaMessage(tool_calls=tool_deltas)
            elif not added_text and self.current_tool_id > 0:
                # Return an empty DeltaMessage once the tool calls are all done
                # so that finish_reason gets set.
                return DeltaMessage(content='')
            else:
                return None
        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

TOOL_CALL_REGEX class-attribute instance-attribute

TOOL_CALL_REGEX = compile(
    "\\[([a-zA-Z]+\\w*\\(([a-zA-Z]+\\w*=.*,\\s*)*([a-zA-Z]+\\w*=.*\\s)?\\),\\s*)*([a-zA-Z]+\\w*\\(([a-zA-Z]+\\w*=.*,\\s*)*([a-zA-Z]+\\w*=.*\\s*)?\\)\\s*)+\\]",
    DOTALL,
)

current_tool_index property writable

current_tool_index: int

__init__

__init__(tokenizer: PreTrainedTokenizerBase)
Source code in vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py
def __init__(self, tokenizer: PreTrainedTokenizerBase):
    super().__init__(tokenizer)

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract the tool calls from a complete model response.

Source code in vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    """
    Extract the tool calls from a complete model response.
    """

    # remove <|python_start|> and <|python_end|>
    # as Llama 4 model sometime will output those tokens
    if model_output.startswith("<|python_start|>"):
        model_output = model_output[len("<|python_start|>"):]
        model_output = model_output.replace("<|python_end|>", "")

    is_tool_call_pattern = False
    try:
        is_tool_call_pattern = self.TOOL_CALL_REGEX.match(
            model_output,
            timeout=envs.VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS) is not None
    except TimeoutError:
        logger.warning(
            "Regex timeout occurred when matching tool call pattern.")
        logger.debug("Regex timeout occurred when matching user input: %s",
                     model_output)

    if not is_tool_call_pattern:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    try:
        module = ast.parse(model_output)
        parsed = getattr(module.body[0], "value", None)
        if isinstance(parsed, ast.List) and all(
                isinstance(e, ast.Call) for e in parsed.elts):
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=[
                    _handle_single_tool(e)  # type: ignore
                    for e in parsed.elts
                ],
                content=None)
        else:
            raise _UnexpectedAstError(
                "Tool output must be a list of function calls")
    except Exception:
        logger.exception("Error in extracting tool call from response.")
        # Treat as regular text
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    if not current_text.startswith("[") and not current_text.startswith(
            "<|python_start|>"):
        return DeltaMessage(content=delta_text)

    try:
        # remove <|python_start|> and <|python_end|>
        if current_text.startswith("<|python_start|>"):
            current_text = current_text[len("<|python_start|>"):]
        if current_text.endswith("<|python_end|>"):
            current_text = current_text[:current_text.
                                        rfind("<|python_end|>")]
        valid_and_added_text = _make_valid_python(current_text)
        if valid_and_added_text is None:
            return None
        valid_text, added_text = valid_and_added_text

        module = ast.parse(valid_text)
        parsed = getattr(module.body[0], "value", None)
        if not isinstance(parsed, ast.List) or not all(
                isinstance(e, ast.Call) for e in parsed.elts):
            raise _UnexpectedAstError(
                "Tool output must be a list of function calls")
        tool_calls = [
            _handle_single_tool(e)  # type: ignore
            for e in parsed.elts
        ]

        tool_deltas = []
        for index, new_call in enumerate(tool_calls):
            if index < self.current_tool_index:
                continue

            self.current_tool_index = index
            if len(self.streamed_args_for_tool) == index:
                self.streamed_args_for_tool.append("")

            new_call_complete = index < len(
                tool_calls) - 1 or ")]" not in added_text
            if new_call_complete:
                self.current_tool_index += 1

            withheld_suffix = (added_text[:-2]
                               if not new_call_complete else "")
            if not new_call_complete and added_text[-2] == ")":
                # Function call is incomplete. Withhold the closing bracket.
                withheld_suffix = withheld_suffix + "}"
            # Strings get single quotes in the model-produced string.
            # JSON requires double quotes.
            withheld_suffix = withheld_suffix.replace("'", '"')
            delta = _compute_tool_delta(self.streamed_args_for_tool[index],
                                        new_call, index, withheld_suffix)

            if delta is not None:
                tool_deltas.append(delta)
                if (delta.function is not None
                        and delta.function.arguments is not None):
                    self.streamed_args_for_tool[
                        index] += delta.function.arguments

    # HACK: serving_chat.py inspects the internal state of tool parsers
    # when determining it's final streaming delta, automatically
    # adding autocompleted JSON.
    # These two lines avoid that nonsense while ensuring finish_reason
    # is set to tool_calls when at least one tool is called.
        if tool_deltas and not self.prev_tool_call_arr:
            self.prev_tool_call_arr = [{"arguments": {}}]

        if tool_deltas:
            return DeltaMessage(tool_calls=tool_deltas)
        elif not added_text and self.current_tool_id > 0:
            # Return an empty DeltaMessage once the tool calls are all done
            # so that finish_reason gets set.
            return DeltaMessage(content='')
        else:
            return None
    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

MinimaxToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
@ToolParserManager.register_module("minimax")
class MinimaxToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        # Initialize streaming state for tracking tool call progress
        self.streaming_state: dict[str, Any] = {
            "current_tool_index": -1,  # Index of current tool being processed
            "tool_ids": [],  # List of tool call IDs
            "sent_tools": [],  # List of tools that have been sent
        }

        # Define tool call tokens and patterns
        self.tool_call_start_token = "<tool_calls>"
        self.tool_call_end_token = "</tool_calls>"
        self.tool_call_regex = re.compile(
            r"<tool_calls>(.*?)</tool_calls>|<tool_calls>(.*)", re.DOTALL)
        self.thinking_tag_pattern = r"<think>(.*?)</think>"
        self.tool_name_pattern = re.compile(r'"name":\s*"([^"]+)"')
        self.tool_args_pattern = re.compile(r'"arguments":\s*')

        # Buffer for handling partial tool calls during streaming
        self.pending_buffer = ""
        self.in_thinking_tag = False

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")

        # Get token IDs for tool call start/end tokens
        self.tool_call_start_token_id = self.vocab.get(
            self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

        if (self.tool_call_start_token_id is None
                or self.tool_call_end_token_id is None):
            logger.warning(
                "Minimax Tool parser could not locate tool call start/end "
                "tokens in the tokenizer. Falling back to string matching.")

    def preprocess_model_output(self, model_output: str) -> str:
        """
        Preprocess model output by removing tool calls from thinking tags.

        Args:
            model_output: Raw model output string

        Returns:
            Preprocessed model output with tool calls removed from thinking tags
        """

        def remove_tool_calls_from_think(match):
            think_content = match.group(1)
            cleaned_content = re.sub(r"<tool_calls>.*?</tool_calls>",
                                     "",
                                     think_content,
                                     flags=re.DOTALL)
            return f"<think>{cleaned_content}</think>"

        return re.sub(self.thinking_tag_pattern,
                      remove_tool_calls_from_think,
                      model_output,
                      flags=re.DOTALL)

    def _clean_duplicate_braces(self, args_text: str) -> str:
        """
        Clean duplicate closing braces from arguments text.

        Args:
            args_text: Raw arguments text

        Returns:
            Cleaned arguments text with proper JSON formatting
        """
        args_text = args_text.strip()
        if not args_text:
            return args_text

        try:
            json.loads(args_text)
            return args_text
        except json.JSONDecodeError:
            pass

        while args_text.endswith('}}'):
            candidate = args_text[:-1]
            try:
                json.loads(candidate)
                return candidate
            except json.JSONDecodeError:
                args_text = candidate

        return args_text

    def _clean_delta_braces(self, delta_text: str) -> str:
        """
        Clean delta text by removing excessive closing braces.

        Args:
            delta_text: Delta text to clean

        Returns:
            Cleaned delta text
        """
        if not delta_text:
            return delta_text

        delta_stripped = delta_text.strip()

        if delta_stripped and all(c in '}\n\r\t ' for c in delta_stripped):
            brace_count = delta_stripped.count('}')
            if brace_count > 1:
                return '}\n' if delta_text.endswith('\n') else '}'

        return delta_text

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        """
        Extract tool calls from model output for non-streaming mode.

        Args:
            model_output: Complete model output
            request: Chat completion request

        Returns:
            ExtractedToolCallInformation containing tool calls and content
        """
        processed_output = self.preprocess_model_output(model_output)

        if self.tool_call_start_token not in processed_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        try:
            function_call_tuples = self.tool_call_regex.findall(
                processed_output)

            raw_function_calls = []
            for match in function_call_tuples:
                tool_call_content = match[0] if match[0] else match[1]
                if tool_call_content.strip():
                    lines = tool_call_content.strip().split('\n')
                    for line in lines:
                        line = line.strip()
                        if line and line.startswith('{') and line.endswith(
                                '}'):
                            try:
                                parsed_call = json.loads(line)
                                raw_function_calls.append(parsed_call)
                            except json.JSONDecodeError:
                                continue

            tool_calls = []
            for function_call in raw_function_calls:
                if "name" in function_call and "arguments" in function_call:
                    tool_calls.append(
                        ToolCall(type="function",
                                 function=FunctionCall(
                                     name=function_call["name"],
                                     arguments=json.dumps(
                                         function_call["arguments"],
                                         ensure_ascii=False))))

            processed_pos = processed_output.find(self.tool_call_start_token)
            if processed_pos != -1:
                processed_content = processed_output[:processed_pos].strip()

                if processed_content:
                    lines = processed_content.split('\n')
                    for line in reversed(lines):
                        line = line.strip()
                        if line:
                            pos = model_output.find(line)
                            if pos != -1:
                                content = model_output[:pos + len(line)]
                                break
                    else:
                        content = ""
                else:
                    content = ""
            else:
                content = model_output

            return ExtractedToolCallInformation(
                tools_called=len(tool_calls) > 0,
                tool_calls=tool_calls,
                content=content.strip() if content.strip() else None)

        except Exception:
            logger.exception(
                "An unexpected error occurred during tool call extraction.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def _update_thinking_state(self, text: str) -> None:
        """
        Update the thinking tag state based on text content.

        Args:
            text: Text to analyze for thinking tags
        """
        open_count = text.count("<think>")
        close_count = text.count("</think>")
        self.in_thinking_tag = open_count > close_count or (
            open_count == close_count and text.endswith("</think>"))

    def _is_potential_tag_start(self, text: str) -> bool:
        """
        Check if text might be the start of a tool call tag.

        Args:
            text: Text to check

        Returns:
            True if text could be the start of a tool call tag
        """
        for tag in [self.tool_call_start_token, self.tool_call_end_token]:
            if any(
                    tag.startswith(text[-i:])
                    for i in range(1, min(len(text) + 1, len(tag)))):
                return True
        return False

    def _should_buffer_content(self, delta_text: str) -> bool:
        """
        Determine if content should be buffered for later processing.

        Args:
            delta_text: Delta text to check

        Returns:
            True if content should be buffered
        """
        if self.in_thinking_tag:
            return False
        return bool(self.pending_buffer
                    or self.tool_call_start_token in delta_text
                    or self.tool_call_end_token in delta_text
                    or delta_text.startswith('<'))

    def _split_content_for_buffering(self, delta_text: str) -> tuple[str, str]:
        """
        Split delta text into safe content and potential tag content.

        Args:
            delta_text: Delta text to split

        Returns:
            Tuple of (safe_content, potential_tag_content)
        """
        if self.in_thinking_tag:
            return delta_text, ""

        for tag in [self.tool_call_start_token, self.tool_call_end_token]:
            for i in range(1, len(tag)):
                tag_prefix = tag[:i]
                pos = delta_text.rfind(tag_prefix)
                if pos != -1 and tag.startswith(delta_text[pos:]):
                    return delta_text[:pos], delta_text[pos:]
        return delta_text, ""

    def _process_buffer(self, new_content: str) -> str:
        """
        Process buffered content and return output content.

        Args:
            new_content: New content to add to buffer

        Returns:
            Processed output content
        """
        self.pending_buffer += new_content
        output_content = ""

        if self.in_thinking_tag:
            output_content = self.pending_buffer
            self.pending_buffer = ""
            return output_content

        while self.pending_buffer:
            start_pos = self.pending_buffer.find(self.tool_call_start_token)
            end_pos = self.pending_buffer.find(self.tool_call_end_token)

            if start_pos != -1 and (end_pos == -1 or start_pos < end_pos):
                tag_pos, tag_len = start_pos, len(self.tool_call_start_token)
            elif end_pos != -1:
                tag_pos, tag_len = end_pos, len(self.tool_call_end_token)
            else:
                if self._is_potential_tag_start(self.pending_buffer):
                    break
                output_content += self.pending_buffer
                self.pending_buffer = ""
                break

            output_content += self.pending_buffer[:tag_pos]
            self.pending_buffer = self.pending_buffer[tag_pos + tag_len:]

        return output_content

    def _reset_streaming_state(self) -> None:
        """Reset the streaming state to initial values."""
        self.streaming_state = {
            "current_tool_index": -1,
            "tool_ids": [],
            "sent_tools": [],
        }

    def _advance_to_next_tool(self) -> None:
        """Advance to the next tool in the streaming sequence."""
        self.streaming_state["current_tool_index"] = int(
            self.streaming_state["current_tool_index"]) + 1

    def _set_current_tool_index(self, index: int) -> None:
        """
        Set the current tool index.

        Args:
            index: Tool index to set
        """
        self.streaming_state["current_tool_index"] = index

    def _get_current_tool_index(self) -> int:
        """
        Get the current tool index.

        Returns:
            Current tool index
        """
        return int(self.streaming_state["current_tool_index"])

    def _get_next_unsent_tool_index(self, tool_count: int) -> int:
        """
        Get the index of the next unsent tool.

        Args:
            tool_count: Total number of tools

        Returns:
            Index of next unsent tool, or -1 if all tools sent
        """
        sent_tools = list(self.streaming_state["sent_tools"])
        for i in range(tool_count):
            if i < len(sent_tools):
                if not sent_tools[i]["sent_name"]:
                    return i
            else:
                return i
        return -1

    def _ensure_state_arrays(self, tool_count: int) -> None:
        """
        Ensure state arrays have sufficient capacity for tool_count tools.

        Args:
            tool_count: Number of tools to prepare for
        """
        sent_tools = list(self.streaming_state["sent_tools"])
        tool_ids = list(self.streaming_state["tool_ids"])

        while len(sent_tools) < tool_count:
            sent_tools.append({
                "sent_name": False,
                "sent_arguments": "",
                "id": make_tool_call_id(),
            })

        while len(tool_ids) < tool_count:
            tool_ids.append(None)

        self.streaming_state["sent_tools"] = sent_tools
        self.streaming_state["tool_ids"] = tool_ids

    def _detect_tools_in_text(self, text: str) -> int:
        """
        Detect the number of tools in text by counting name patterns.

        Args:
            text: Text to analyze

        Returns:
            Number of tools detected
        """
        matches = self.tool_name_pattern.findall(text)
        return len(matches)

    def _find_tool_boundaries(self, text: str) -> list[tuple[int, int]]:
        """
        Find the boundaries of tool calls in text.

        Args:
            text: Text to analyze

        Returns:
            List of (start, end) positions for tool calls
        """
        boundaries = []
        i = 0
        while i < len(text):
            if text[i] == '{':
                start = i
                depth = 0
                has_name = False
                has_arguments = False

                while i < len(text):
                    if text[i] == '{':
                        depth += 1
                    elif text[i] == '}':
                        depth -= 1
                        if depth == 0:
                            end = i + 1
                            segment = text[start:end]
                            if '"name"' in segment and '"arguments"' in segment:
                                boundaries.append((start, end))
                            break

                    if not has_name and '"name"' in text[start:i + 1]:
                        has_name = True
                    if not has_arguments and '"arguments"' in text[start:i +
                                                                   1]:
                        has_arguments = True

                    i += 1

                if depth > 0 and has_name:
                    boundaries.append((start, i))
            else:
                i += 1
        return boundaries

    def _extract_tool_args(self, tool_content: str, args_match) -> str:
        """
        Extract tool arguments from tool content.

        Args:
            tool_content: Tool call content
            args_match: Regex match for arguments pattern

        Returns:
            Extracted arguments as string
        """
        args_start_pos = args_match.end()
        remaining_content = tool_content[args_start_pos:]

        if remaining_content.strip().startswith('{'):
            depth = 0
            for i, char in enumerate(remaining_content):
                if char == '{':
                    depth += 1
                elif char == '}':
                    depth -= 1
                    if depth == 0:
                        return remaining_content[:i + 1]
        else:
            args_end = remaining_content.find('}')
            if args_end > 0:
                return remaining_content[:args_end].strip()

        return remaining_content.rstrip('}').strip()

    def _get_current_tool_content(
            self, text: str,
            tool_index: int) -> tuple[Optional[str], Optional[str]]:
        """
        Get the content of a specific tool by index.

        Args:
            text: Text containing tool calls
            tool_index: Index of tool to extract

        Returns:
            Tuple of (tool_name, tool_arguments) or (None, None) if not found
        """
        boundaries = self._find_tool_boundaries(text)

        if tool_index >= len(boundaries):
            return None, None

        start, end = boundaries[tool_index]
        tool_content = text[start:end]

        name_match = self.tool_name_pattern.search(tool_content)
        name = name_match.group(1) if name_match else None

        args_match = self.tool_args_pattern.search(tool_content)
        if args_match:
            try:
                args_text = self._extract_tool_args(tool_content, args_match)
                return name, args_text
            except Exception:
                remaining_content = tool_content[args_match.end():]
                args_text = remaining_content.rstrip('}').strip()
                return name, args_text

        return name, None

    def _handle_tool_name_streaming(
            self, tool_content: str,
            tool_count: int) -> Union[DeltaMessage, None]:
        """
        Handle streaming of tool names.

        Args:
            tool_content: Content containing tool calls
            tool_count: Total number of tools

        Returns:
            DeltaMessage with tool name or None if no tool to stream
        """
        next_idx = self._get_next_unsent_tool_index(tool_count)

        if next_idx == -1:
            return None

        boundaries = self._find_tool_boundaries(tool_content)
        if next_idx >= len(boundaries):
            return None

        tool_name, _ = self._get_current_tool_content(tool_content, next_idx)
        if not tool_name:
            return None

        self._set_current_tool_index(next_idx)
        sent_tools = list(self.streaming_state["sent_tools"])
        tool_ids = list(self.streaming_state["tool_ids"])

        tool_id = sent_tools[next_idx]["id"]
        tool_ids[next_idx] = tool_id
        sent_tools[next_idx]["sent_name"] = True

        self.streaming_state["sent_tools"] = sent_tools
        self.streaming_state["tool_ids"] = tool_ids

        return DeltaMessage(tool_calls=[
            DeltaToolCall(index=next_idx,
                          type="function",
                          id=tool_id,
                          function=DeltaFunctionCall(
                              name=tool_name).model_dump(exclude_none=True))
        ])

    def _handle_tool_args_streaming(
            self, tool_content: str,
            tool_count: int) -> Union[DeltaMessage, None]:
        """
        Handle streaming of tool arguments.

        Args:
            tool_content: Content containing tool calls
            tool_count: Total number of tools

        Returns:
            DeltaMessage with tool arguments or None if no arguments to stream
        """
        current_idx = self._get_current_tool_index()

        if current_idx < 0 or current_idx >= tool_count:
            return None

        tool_name, tool_args = self._get_current_tool_content(
            tool_content, current_idx)
        if not tool_name or tool_args is None:
            return None

        sent_tools = list(self.streaming_state["sent_tools"])

        if not sent_tools[current_idx]["sent_name"]:
            return None

        clean_args = self._clean_duplicate_braces(tool_args)
        sent_args = sent_tools[current_idx]["sent_arguments"]

        if clean_args != sent_args:
            if sent_args and clean_args.startswith(sent_args):
                args_delta = extract_intermediate_diff(clean_args, sent_args)
                if args_delta:
                    args_delta = self._clean_delta_braces(args_delta)
                    sent_tools[current_idx]["sent_arguments"] = clean_args
                    self.streaming_state["sent_tools"] = sent_tools

                    if clean_args.endswith('}'):
                        self._advance_to_next_tool()

                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(index=current_idx,
                                      function=DeltaFunctionCall(
                                          arguments=args_delta).model_dump(
                                              exclude_none=True))
                    ])
            elif not sent_args and clean_args:
                clean_args_delta = self._clean_delta_braces(clean_args)
                sent_tools[current_idx]["sent_arguments"] = clean_args
                self.streaming_state["sent_tools"] = sent_tools

                if clean_args.endswith('}'):
                    self._advance_to_next_tool()

                return DeltaMessage(tool_calls=[
                    DeltaToolCall(index=current_idx,
                                  function=DeltaFunctionCall(
                                      arguments=clean_args_delta).model_dump(
                                          exclude_none=True))
                ])

        return None

    def _is_end_tool_calls(self, current_text: str) -> bool:
        if self.tool_call_end_token not in current_text:
            return False

        end_token_positions = []
        search_start = 0
        while True:
            pos = current_text.find(self.tool_call_end_token, search_start)
            if pos == -1:
                break
            end_token_positions.append(pos)
            search_start = pos + 1

        think_regions = []
        for match in re.finditer(self.thinking_tag_pattern,
                                 current_text,
                                 flags=re.DOTALL):
            think_regions.append((match.start(), match.end()))

        for pos in end_token_positions:
            in_think = any(pos >= t_start and pos < t_end
                           for t_start, t_end in think_regions)
            if not in_think:
                return True

        return False

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        self._update_thinking_state(current_text)

        if self.in_thinking_tag:
            return DeltaMessage(content=delta_text)

        if self._should_buffer_content(delta_text):
            buffered_output = self._process_buffer(delta_text)
            return DeltaMessage(
                content=buffered_output) if buffered_output else None

        if self._is_end_tool_calls(current_text):
            return DeltaMessage(content=delta_text)

        safe_content, potential_tag = self._split_content_for_buffering(
            delta_text)
        if potential_tag:
            self.pending_buffer += potential_tag
            return DeltaMessage(content=safe_content) if safe_content else None

        processed_current_text = self.preprocess_model_output(current_text)

        if self.tool_call_start_token not in processed_current_text:
            if (self.tool_call_end_token in delta_text
                    and self.tool_call_start_token in current_text):
                return None
            if delta_text.strip(
            ) == '' and self.tool_call_start_token in current_text:
                return None
            if (self._get_current_tool_index() != -1
                    and self.tool_call_end_token in current_text):
                self._reset_streaming_state()
            return DeltaMessage(content=delta_text)

        if (self.tool_call_start_token_id is not None
                and self.tool_call_start_token_id in delta_token_ids
                and len(delta_token_ids) == 1):
            return None

        original_tool_start = self._find_tool_start_outside_thinking(
            current_text)
        if original_tool_start is None:
            return None

        content_before_tools = self._extract_content_before_tools(
            current_text, delta_text, original_tool_start)
        if content_before_tools:
            return DeltaMessage(content=content_before_tools)

        try:
            tool_content = self._extract_tool_content(current_text,
                                                      original_tool_start)
            current_tools_count = self._detect_tools_in_text(tool_content)

            if current_tools_count == 0:
                return None

            if self._get_current_tool_index() == -1:
                self._reset_streaming_state()

            self._ensure_state_arrays(current_tools_count)

            return (self._handle_tool_name_streaming(tool_content,
                                                     current_tools_count)
                    or self._handle_tool_args_streaming(
                        tool_content, current_tools_count))

        except Exception:
            logger.exception("An unexpected error occurred ",
                             "during streaming tool call handling.")
            return None

    def _find_tool_start_outside_thinking(self,
                                          current_text: str) -> Optional[int]:
        """
        Find the start position of tool calls outside of thinking tags.

        Args:
            current_text: Current text to search

        Returns:
            Position of tool call start or None if not found
        """
        search_start = 0
        while True:
            pos = current_text.find(self.tool_call_start_token, search_start)
            if pos == -1:
                return None

            think_regions = [(m.start(), m.end()) for m in re.finditer(
                r"<think>(.*?)</think>", current_text, flags=re.DOTALL)]
            in_think = any(pos >= t_start and pos < t_end
                           for t_start, t_end in think_regions)

            if not in_think:
                return pos

            search_start = pos + 1

    def _extract_content_before_tools(self, current_text: str, delta_text: str,
                                      tool_start: int) -> Optional[str]:
        """
        Extract content that appears before tool calls.

        Args:
            current_text: Current text
            delta_text: Delta text
            tool_start: Start position of tools

        Returns:
            Content before tools or None
        """
        if tool_start > 0:
            delta_start_pos = len(current_text) - len(delta_text)
            if delta_start_pos < tool_start:
                content_part = delta_text
                if delta_start_pos + len(delta_text) > tool_start:
                    content_part = delta_text[:tool_start - delta_start_pos]
                return content_part if content_part else None
        return None

    def _extract_tool_content(self, current_text: str, tool_start: int) -> str:
        """
        Extract tool content from current text starting at tool_start.

        Args:
            current_text: Current text
            tool_start: Start position of tool calls

        Returns:
            Extracted tool content
        """
        tool_content_start = tool_start + len(self.tool_call_start_token)
        tool_content = current_text[tool_content_start:]

        end_pos = tool_content.find(self.tool_call_end_token)
        if end_pos != -1:
            tool_content = tool_content[:end_pos]

        return tool_content

in_thinking_tag instance-attribute

in_thinking_tag = False

pending_buffer instance-attribute

pending_buffer = ''

streaming_state instance-attribute

streaming_state: dict[str, Any] = {
    "current_tool_index": -1,
    "tool_ids": [],
    "sent_tools": [],
}

thinking_tag_pattern instance-attribute

thinking_tag_pattern = '<think>(.*?)</think>'

tool_args_pattern instance-attribute

tool_args_pattern = compile('"arguments":\\s*')

tool_call_end_token instance-attribute

tool_call_end_token = '</tool_calls>'

tool_call_end_token_id instance-attribute

tool_call_end_token_id = get(tool_call_end_token)

tool_call_regex instance-attribute

tool_call_regex = compile(
    "<tool_calls>(.*?)</tool_calls>|<tool_calls>(.*)",
    DOTALL,
)

tool_call_start_token instance-attribute

tool_call_start_token = '<tool_calls>'

tool_call_start_token_id instance-attribute

tool_call_start_token_id = get(tool_call_start_token)

tool_name_pattern instance-attribute

tool_name_pattern = compile('"name":\\s*"([^"]+)"')

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    # Initialize streaming state for tracking tool call progress
    self.streaming_state: dict[str, Any] = {
        "current_tool_index": -1,  # Index of current tool being processed
        "tool_ids": [],  # List of tool call IDs
        "sent_tools": [],  # List of tools that have been sent
    }

    # Define tool call tokens and patterns
    self.tool_call_start_token = "<tool_calls>"
    self.tool_call_end_token = "</tool_calls>"
    self.tool_call_regex = re.compile(
        r"<tool_calls>(.*?)</tool_calls>|<tool_calls>(.*)", re.DOTALL)
    self.thinking_tag_pattern = r"<think>(.*?)</think>"
    self.tool_name_pattern = re.compile(r'"name":\s*"([^"]+)"')
    self.tool_args_pattern = re.compile(r'"arguments":\s*')

    # Buffer for handling partial tool calls during streaming
    self.pending_buffer = ""
    self.in_thinking_tag = False

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")

    # Get token IDs for tool call start/end tokens
    self.tool_call_start_token_id = self.vocab.get(
        self.tool_call_start_token)
    self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

    if (self.tool_call_start_token_id is None
            or self.tool_call_end_token_id is None):
        logger.warning(
            "Minimax Tool parser could not locate tool call start/end "
            "tokens in the tokenizer. Falling back to string matching.")

_advance_to_next_tool

_advance_to_next_tool() -> None

Advance to the next tool in the streaming sequence.

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _advance_to_next_tool(self) -> None:
    """Advance to the next tool in the streaming sequence."""
    self.streaming_state["current_tool_index"] = int(
        self.streaming_state["current_tool_index"]) + 1

_clean_delta_braces

_clean_delta_braces(delta_text: str) -> str

Clean delta text by removing excessive closing braces.

Parameters:

Name Type Description Default
delta_text str

Delta text to clean

required

Returns:

Type Description
str

Cleaned delta text

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _clean_delta_braces(self, delta_text: str) -> str:
    """
    Clean delta text by removing excessive closing braces.

    Args:
        delta_text: Delta text to clean

    Returns:
        Cleaned delta text
    """
    if not delta_text:
        return delta_text

    delta_stripped = delta_text.strip()

    if delta_stripped and all(c in '}\n\r\t ' for c in delta_stripped):
        brace_count = delta_stripped.count('}')
        if brace_count > 1:
            return '}\n' if delta_text.endswith('\n') else '}'

    return delta_text

_clean_duplicate_braces

_clean_duplicate_braces(args_text: str) -> str

Clean duplicate closing braces from arguments text.

Parameters:

Name Type Description Default
args_text str

Raw arguments text

required

Returns:

Type Description
str

Cleaned arguments text with proper JSON formatting

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _clean_duplicate_braces(self, args_text: str) -> str:
    """
    Clean duplicate closing braces from arguments text.

    Args:
        args_text: Raw arguments text

    Returns:
        Cleaned arguments text with proper JSON formatting
    """
    args_text = args_text.strip()
    if not args_text:
        return args_text

    try:
        json.loads(args_text)
        return args_text
    except json.JSONDecodeError:
        pass

    while args_text.endswith('}}'):
        candidate = args_text[:-1]
        try:
            json.loads(candidate)
            return candidate
        except json.JSONDecodeError:
            args_text = candidate

    return args_text

_detect_tools_in_text

_detect_tools_in_text(text: str) -> int

Detect the number of tools in text by counting name patterns.

Parameters:

Name Type Description Default
text str

Text to analyze

required

Returns:

Type Description
int

Number of tools detected

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _detect_tools_in_text(self, text: str) -> int:
    """
    Detect the number of tools in text by counting name patterns.

    Args:
        text: Text to analyze

    Returns:
        Number of tools detected
    """
    matches = self.tool_name_pattern.findall(text)
    return len(matches)

_ensure_state_arrays

_ensure_state_arrays(tool_count: int) -> None

Ensure state arrays have sufficient capacity for tool_count tools.

Parameters:

Name Type Description Default
tool_count int

Number of tools to prepare for

required
Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _ensure_state_arrays(self, tool_count: int) -> None:
    """
    Ensure state arrays have sufficient capacity for tool_count tools.

    Args:
        tool_count: Number of tools to prepare for
    """
    sent_tools = list(self.streaming_state["sent_tools"])
    tool_ids = list(self.streaming_state["tool_ids"])

    while len(sent_tools) < tool_count:
        sent_tools.append({
            "sent_name": False,
            "sent_arguments": "",
            "id": make_tool_call_id(),
        })

    while len(tool_ids) < tool_count:
        tool_ids.append(None)

    self.streaming_state["sent_tools"] = sent_tools
    self.streaming_state["tool_ids"] = tool_ids

_extract_content_before_tools

_extract_content_before_tools(
    current_text: str, delta_text: str, tool_start: int
) -> Optional[str]

Extract content that appears before tool calls.

Parameters:

Name Type Description Default
current_text str

Current text

required
delta_text str

Delta text

required
tool_start int

Start position of tools

required

Returns:

Type Description
Optional[str]

Content before tools or None

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _extract_content_before_tools(self, current_text: str, delta_text: str,
                                  tool_start: int) -> Optional[str]:
    """
    Extract content that appears before tool calls.

    Args:
        current_text: Current text
        delta_text: Delta text
        tool_start: Start position of tools

    Returns:
        Content before tools or None
    """
    if tool_start > 0:
        delta_start_pos = len(current_text) - len(delta_text)
        if delta_start_pos < tool_start:
            content_part = delta_text
            if delta_start_pos + len(delta_text) > tool_start:
                content_part = delta_text[:tool_start - delta_start_pos]
            return content_part if content_part else None
    return None

_extract_tool_args

_extract_tool_args(tool_content: str, args_match) -> str

Extract tool arguments from tool content.

Parameters:

Name Type Description Default
tool_content str

Tool call content

required
args_match

Regex match for arguments pattern

required

Returns:

Type Description
str

Extracted arguments as string

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _extract_tool_args(self, tool_content: str, args_match) -> str:
    """
    Extract tool arguments from tool content.

    Args:
        tool_content: Tool call content
        args_match: Regex match for arguments pattern

    Returns:
        Extracted arguments as string
    """
    args_start_pos = args_match.end()
    remaining_content = tool_content[args_start_pos:]

    if remaining_content.strip().startswith('{'):
        depth = 0
        for i, char in enumerate(remaining_content):
            if char == '{':
                depth += 1
            elif char == '}':
                depth -= 1
                if depth == 0:
                    return remaining_content[:i + 1]
    else:
        args_end = remaining_content.find('}')
        if args_end > 0:
            return remaining_content[:args_end].strip()

    return remaining_content.rstrip('}').strip()

_extract_tool_content

_extract_tool_content(
    current_text: str, tool_start: int
) -> str

Extract tool content from current text starting at tool_start.

Parameters:

Name Type Description Default
current_text str

Current text

required
tool_start int

Start position of tool calls

required

Returns:

Type Description
str

Extracted tool content

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _extract_tool_content(self, current_text: str, tool_start: int) -> str:
    """
    Extract tool content from current text starting at tool_start.

    Args:
        current_text: Current text
        tool_start: Start position of tool calls

    Returns:
        Extracted tool content
    """
    tool_content_start = tool_start + len(self.tool_call_start_token)
    tool_content = current_text[tool_content_start:]

    end_pos = tool_content.find(self.tool_call_end_token)
    if end_pos != -1:
        tool_content = tool_content[:end_pos]

    return tool_content

_find_tool_boundaries

_find_tool_boundaries(text: str) -> list[tuple[int, int]]

Find the boundaries of tool calls in text.

Parameters:

Name Type Description Default
text str

Text to analyze

required

Returns:

Type Description
list[tuple[int, int]]

List of (start, end) positions for tool calls

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _find_tool_boundaries(self, text: str) -> list[tuple[int, int]]:
    """
    Find the boundaries of tool calls in text.

    Args:
        text: Text to analyze

    Returns:
        List of (start, end) positions for tool calls
    """
    boundaries = []
    i = 0
    while i < len(text):
        if text[i] == '{':
            start = i
            depth = 0
            has_name = False
            has_arguments = False

            while i < len(text):
                if text[i] == '{':
                    depth += 1
                elif text[i] == '}':
                    depth -= 1
                    if depth == 0:
                        end = i + 1
                        segment = text[start:end]
                        if '"name"' in segment and '"arguments"' in segment:
                            boundaries.append((start, end))
                        break

                if not has_name and '"name"' in text[start:i + 1]:
                    has_name = True
                if not has_arguments and '"arguments"' in text[start:i +
                                                               1]:
                    has_arguments = True

                i += 1

            if depth > 0 and has_name:
                boundaries.append((start, i))
        else:
            i += 1
    return boundaries

_find_tool_start_outside_thinking

_find_tool_start_outside_thinking(
    current_text: str,
) -> Optional[int]

Find the start position of tool calls outside of thinking tags.

Parameters:

Name Type Description Default
current_text str

Current text to search

required

Returns:

Type Description
Optional[int]

Position of tool call start or None if not found

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _find_tool_start_outside_thinking(self,
                                      current_text: str) -> Optional[int]:
    """
    Find the start position of tool calls outside of thinking tags.

    Args:
        current_text: Current text to search

    Returns:
        Position of tool call start or None if not found
    """
    search_start = 0
    while True:
        pos = current_text.find(self.tool_call_start_token, search_start)
        if pos == -1:
            return None

        think_regions = [(m.start(), m.end()) for m in re.finditer(
            r"<think>(.*?)</think>", current_text, flags=re.DOTALL)]
        in_think = any(pos >= t_start and pos < t_end
                       for t_start, t_end in think_regions)

        if not in_think:
            return pos

        search_start = pos + 1

_get_current_tool_content

_get_current_tool_content(
    text: str, tool_index: int
) -> tuple[Optional[str], Optional[str]]

Get the content of a specific tool by index.

Parameters:

Name Type Description Default
text str

Text containing tool calls

required
tool_index int

Index of tool to extract

required

Returns:

Type Description
tuple[Optional[str], Optional[str]]

Tuple of (tool_name, tool_arguments) or (None, None) if not found

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _get_current_tool_content(
        self, text: str,
        tool_index: int) -> tuple[Optional[str], Optional[str]]:
    """
    Get the content of a specific tool by index.

    Args:
        text: Text containing tool calls
        tool_index: Index of tool to extract

    Returns:
        Tuple of (tool_name, tool_arguments) or (None, None) if not found
    """
    boundaries = self._find_tool_boundaries(text)

    if tool_index >= len(boundaries):
        return None, None

    start, end = boundaries[tool_index]
    tool_content = text[start:end]

    name_match = self.tool_name_pattern.search(tool_content)
    name = name_match.group(1) if name_match else None

    args_match = self.tool_args_pattern.search(tool_content)
    if args_match:
        try:
            args_text = self._extract_tool_args(tool_content, args_match)
            return name, args_text
        except Exception:
            remaining_content = tool_content[args_match.end():]
            args_text = remaining_content.rstrip('}').strip()
            return name, args_text

    return name, None

_get_current_tool_index

_get_current_tool_index() -> int

Get the current tool index.

Returns:

Type Description
int

Current tool index

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _get_current_tool_index(self) -> int:
    """
    Get the current tool index.

    Returns:
        Current tool index
    """
    return int(self.streaming_state["current_tool_index"])

_get_next_unsent_tool_index

_get_next_unsent_tool_index(tool_count: int) -> int

Get the index of the next unsent tool.

Parameters:

Name Type Description Default
tool_count int

Total number of tools

required

Returns:

Type Description
int

Index of next unsent tool, or -1 if all tools sent

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _get_next_unsent_tool_index(self, tool_count: int) -> int:
    """
    Get the index of the next unsent tool.

    Args:
        tool_count: Total number of tools

    Returns:
        Index of next unsent tool, or -1 if all tools sent
    """
    sent_tools = list(self.streaming_state["sent_tools"])
    for i in range(tool_count):
        if i < len(sent_tools):
            if not sent_tools[i]["sent_name"]:
                return i
        else:
            return i
    return -1

_handle_tool_args_streaming

_handle_tool_args_streaming(
    tool_content: str, tool_count: int
) -> Union[DeltaMessage, None]

Handle streaming of tool arguments.

Parameters:

Name Type Description Default
tool_content str

Content containing tool calls

required
tool_count int

Total number of tools

required

Returns:

Type Description
Union[DeltaMessage, None]

DeltaMessage with tool arguments or None if no arguments to stream

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _handle_tool_args_streaming(
        self, tool_content: str,
        tool_count: int) -> Union[DeltaMessage, None]:
    """
    Handle streaming of tool arguments.

    Args:
        tool_content: Content containing tool calls
        tool_count: Total number of tools

    Returns:
        DeltaMessage with tool arguments or None if no arguments to stream
    """
    current_idx = self._get_current_tool_index()

    if current_idx < 0 or current_idx >= tool_count:
        return None

    tool_name, tool_args = self._get_current_tool_content(
        tool_content, current_idx)
    if not tool_name or tool_args is None:
        return None

    sent_tools = list(self.streaming_state["sent_tools"])

    if not sent_tools[current_idx]["sent_name"]:
        return None

    clean_args = self._clean_duplicate_braces(tool_args)
    sent_args = sent_tools[current_idx]["sent_arguments"]

    if clean_args != sent_args:
        if sent_args and clean_args.startswith(sent_args):
            args_delta = extract_intermediate_diff(clean_args, sent_args)
            if args_delta:
                args_delta = self._clean_delta_braces(args_delta)
                sent_tools[current_idx]["sent_arguments"] = clean_args
                self.streaming_state["sent_tools"] = sent_tools

                if clean_args.endswith('}'):
                    self._advance_to_next_tool()

                return DeltaMessage(tool_calls=[
                    DeltaToolCall(index=current_idx,
                                  function=DeltaFunctionCall(
                                      arguments=args_delta).model_dump(
                                          exclude_none=True))
                ])
        elif not sent_args and clean_args:
            clean_args_delta = self._clean_delta_braces(clean_args)
            sent_tools[current_idx]["sent_arguments"] = clean_args
            self.streaming_state["sent_tools"] = sent_tools

            if clean_args.endswith('}'):
                self._advance_to_next_tool()

            return DeltaMessage(tool_calls=[
                DeltaToolCall(index=current_idx,
                              function=DeltaFunctionCall(
                                  arguments=clean_args_delta).model_dump(
                                      exclude_none=True))
            ])

    return None

_handle_tool_name_streaming

_handle_tool_name_streaming(
    tool_content: str, tool_count: int
) -> Union[DeltaMessage, None]

Handle streaming of tool names.

Parameters:

Name Type Description Default
tool_content str

Content containing tool calls

required
tool_count int

Total number of tools

required

Returns:

Type Description
Union[DeltaMessage, None]

DeltaMessage with tool name or None if no tool to stream

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _handle_tool_name_streaming(
        self, tool_content: str,
        tool_count: int) -> Union[DeltaMessage, None]:
    """
    Handle streaming of tool names.

    Args:
        tool_content: Content containing tool calls
        tool_count: Total number of tools

    Returns:
        DeltaMessage with tool name or None if no tool to stream
    """
    next_idx = self._get_next_unsent_tool_index(tool_count)

    if next_idx == -1:
        return None

    boundaries = self._find_tool_boundaries(tool_content)
    if next_idx >= len(boundaries):
        return None

    tool_name, _ = self._get_current_tool_content(tool_content, next_idx)
    if not tool_name:
        return None

    self._set_current_tool_index(next_idx)
    sent_tools = list(self.streaming_state["sent_tools"])
    tool_ids = list(self.streaming_state["tool_ids"])

    tool_id = sent_tools[next_idx]["id"]
    tool_ids[next_idx] = tool_id
    sent_tools[next_idx]["sent_name"] = True

    self.streaming_state["sent_tools"] = sent_tools
    self.streaming_state["tool_ids"] = tool_ids

    return DeltaMessage(tool_calls=[
        DeltaToolCall(index=next_idx,
                      type="function",
                      id=tool_id,
                      function=DeltaFunctionCall(
                          name=tool_name).model_dump(exclude_none=True))
    ])

_is_end_tool_calls

_is_end_tool_calls(current_text: str) -> bool
Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _is_end_tool_calls(self, current_text: str) -> bool:
    if self.tool_call_end_token not in current_text:
        return False

    end_token_positions = []
    search_start = 0
    while True:
        pos = current_text.find(self.tool_call_end_token, search_start)
        if pos == -1:
            break
        end_token_positions.append(pos)
        search_start = pos + 1

    think_regions = []
    for match in re.finditer(self.thinking_tag_pattern,
                             current_text,
                             flags=re.DOTALL):
        think_regions.append((match.start(), match.end()))

    for pos in end_token_positions:
        in_think = any(pos >= t_start and pos < t_end
                       for t_start, t_end in think_regions)
        if not in_think:
            return True

    return False

_is_potential_tag_start

_is_potential_tag_start(text: str) -> bool

Check if text might be the start of a tool call tag.

Parameters:

Name Type Description Default
text str

Text to check

required

Returns:

Type Description
bool

True if text could be the start of a tool call tag

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _is_potential_tag_start(self, text: str) -> bool:
    """
    Check if text might be the start of a tool call tag.

    Args:
        text: Text to check

    Returns:
        True if text could be the start of a tool call tag
    """
    for tag in [self.tool_call_start_token, self.tool_call_end_token]:
        if any(
                tag.startswith(text[-i:])
                for i in range(1, min(len(text) + 1, len(tag)))):
            return True
    return False

_process_buffer

_process_buffer(new_content: str) -> str

Process buffered content and return output content.

Parameters:

Name Type Description Default
new_content str

New content to add to buffer

required

Returns:

Type Description
str

Processed output content

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _process_buffer(self, new_content: str) -> str:
    """
    Process buffered content and return output content.

    Args:
        new_content: New content to add to buffer

    Returns:
        Processed output content
    """
    self.pending_buffer += new_content
    output_content = ""

    if self.in_thinking_tag:
        output_content = self.pending_buffer
        self.pending_buffer = ""
        return output_content

    while self.pending_buffer:
        start_pos = self.pending_buffer.find(self.tool_call_start_token)
        end_pos = self.pending_buffer.find(self.tool_call_end_token)

        if start_pos != -1 and (end_pos == -1 or start_pos < end_pos):
            tag_pos, tag_len = start_pos, len(self.tool_call_start_token)
        elif end_pos != -1:
            tag_pos, tag_len = end_pos, len(self.tool_call_end_token)
        else:
            if self._is_potential_tag_start(self.pending_buffer):
                break
            output_content += self.pending_buffer
            self.pending_buffer = ""
            break

        output_content += self.pending_buffer[:tag_pos]
        self.pending_buffer = self.pending_buffer[tag_pos + tag_len:]

    return output_content

_reset_streaming_state

_reset_streaming_state() -> None

Reset the streaming state to initial values.

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _reset_streaming_state(self) -> None:
    """Reset the streaming state to initial values."""
    self.streaming_state = {
        "current_tool_index": -1,
        "tool_ids": [],
        "sent_tools": [],
    }

_set_current_tool_index

_set_current_tool_index(index: int) -> None

Set the current tool index.

Parameters:

Name Type Description Default
index int

Tool index to set

required
Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _set_current_tool_index(self, index: int) -> None:
    """
    Set the current tool index.

    Args:
        index: Tool index to set
    """
    self.streaming_state["current_tool_index"] = index

_should_buffer_content

_should_buffer_content(delta_text: str) -> bool

Determine if content should be buffered for later processing.

Parameters:

Name Type Description Default
delta_text str

Delta text to check

required

Returns:

Type Description
bool

True if content should be buffered

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _should_buffer_content(self, delta_text: str) -> bool:
    """
    Determine if content should be buffered for later processing.

    Args:
        delta_text: Delta text to check

    Returns:
        True if content should be buffered
    """
    if self.in_thinking_tag:
        return False
    return bool(self.pending_buffer
                or self.tool_call_start_token in delta_text
                or self.tool_call_end_token in delta_text
                or delta_text.startswith('<'))

_split_content_for_buffering

_split_content_for_buffering(
    delta_text: str,
) -> tuple[str, str]

Split delta text into safe content and potential tag content.

Parameters:

Name Type Description Default
delta_text str

Delta text to split

required

Returns:

Type Description
tuple[str, str]

Tuple of (safe_content, potential_tag_content)

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _split_content_for_buffering(self, delta_text: str) -> tuple[str, str]:
    """
    Split delta text into safe content and potential tag content.

    Args:
        delta_text: Delta text to split

    Returns:
        Tuple of (safe_content, potential_tag_content)
    """
    if self.in_thinking_tag:
        return delta_text, ""

    for tag in [self.tool_call_start_token, self.tool_call_end_token]:
        for i in range(1, len(tag)):
            tag_prefix = tag[:i]
            pos = delta_text.rfind(tag_prefix)
            if pos != -1 and tag.startswith(delta_text[pos:]):
                return delta_text[:pos], delta_text[pos:]
    return delta_text, ""

_update_thinking_state

_update_thinking_state(text: str) -> None

Update the thinking tag state based on text content.

Parameters:

Name Type Description Default
text str

Text to analyze for thinking tags

required
Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def _update_thinking_state(self, text: str) -> None:
    """
    Update the thinking tag state based on text content.

    Args:
        text: Text to analyze for thinking tags
    """
    open_count = text.count("<think>")
    close_count = text.count("</think>")
    self.in_thinking_tag = open_count > close_count or (
        open_count == close_count and text.endswith("</think>"))

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract tool calls from model output for non-streaming mode.

Parameters:

Name Type Description Default
model_output str

Complete model output

required
request ChatCompletionRequest

Chat completion request

required

Returns:

Type Description
ExtractedToolCallInformation

ExtractedToolCallInformation containing tool calls and content

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    """
    Extract tool calls from model output for non-streaming mode.

    Args:
        model_output: Complete model output
        request: Chat completion request

    Returns:
        ExtractedToolCallInformation containing tool calls and content
    """
    processed_output = self.preprocess_model_output(model_output)

    if self.tool_call_start_token not in processed_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    try:
        function_call_tuples = self.tool_call_regex.findall(
            processed_output)

        raw_function_calls = []
        for match in function_call_tuples:
            tool_call_content = match[0] if match[0] else match[1]
            if tool_call_content.strip():
                lines = tool_call_content.strip().split('\n')
                for line in lines:
                    line = line.strip()
                    if line and line.startswith('{') and line.endswith(
                            '}'):
                        try:
                            parsed_call = json.loads(line)
                            raw_function_calls.append(parsed_call)
                        except json.JSONDecodeError:
                            continue

        tool_calls = []
        for function_call in raw_function_calls:
            if "name" in function_call and "arguments" in function_call:
                tool_calls.append(
                    ToolCall(type="function",
                             function=FunctionCall(
                                 name=function_call["name"],
                                 arguments=json.dumps(
                                     function_call["arguments"],
                                     ensure_ascii=False))))

        processed_pos = processed_output.find(self.tool_call_start_token)
        if processed_pos != -1:
            processed_content = processed_output[:processed_pos].strip()

            if processed_content:
                lines = processed_content.split('\n')
                for line in reversed(lines):
                    line = line.strip()
                    if line:
                        pos = model_output.find(line)
                        if pos != -1:
                            content = model_output[:pos + len(line)]
                            break
                else:
                    content = ""
            else:
                content = ""
        else:
            content = model_output

        return ExtractedToolCallInformation(
            tools_called=len(tool_calls) > 0,
            tool_calls=tool_calls,
            content=content.strip() if content.strip() else None)

    except Exception:
        logger.exception(
            "An unexpected error occurred during tool call extraction.")
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    self._update_thinking_state(current_text)

    if self.in_thinking_tag:
        return DeltaMessage(content=delta_text)

    if self._should_buffer_content(delta_text):
        buffered_output = self._process_buffer(delta_text)
        return DeltaMessage(
            content=buffered_output) if buffered_output else None

    if self._is_end_tool_calls(current_text):
        return DeltaMessage(content=delta_text)

    safe_content, potential_tag = self._split_content_for_buffering(
        delta_text)
    if potential_tag:
        self.pending_buffer += potential_tag
        return DeltaMessage(content=safe_content) if safe_content else None

    processed_current_text = self.preprocess_model_output(current_text)

    if self.tool_call_start_token not in processed_current_text:
        if (self.tool_call_end_token in delta_text
                and self.tool_call_start_token in current_text):
            return None
        if delta_text.strip(
        ) == '' and self.tool_call_start_token in current_text:
            return None
        if (self._get_current_tool_index() != -1
                and self.tool_call_end_token in current_text):
            self._reset_streaming_state()
        return DeltaMessage(content=delta_text)

    if (self.tool_call_start_token_id is not None
            and self.tool_call_start_token_id in delta_token_ids
            and len(delta_token_ids) == 1):
        return None

    original_tool_start = self._find_tool_start_outside_thinking(
        current_text)
    if original_tool_start is None:
        return None

    content_before_tools = self._extract_content_before_tools(
        current_text, delta_text, original_tool_start)
    if content_before_tools:
        return DeltaMessage(content=content_before_tools)

    try:
        tool_content = self._extract_tool_content(current_text,
                                                  original_tool_start)
        current_tools_count = self._detect_tools_in_text(tool_content)

        if current_tools_count == 0:
            return None

        if self._get_current_tool_index() == -1:
            self._reset_streaming_state()

        self._ensure_state_arrays(current_tools_count)

        return (self._handle_tool_name_streaming(tool_content,
                                                 current_tools_count)
                or self._handle_tool_args_streaming(
                    tool_content, current_tools_count))

    except Exception:
        logger.exception("An unexpected error occurred ",
                         "during streaming tool call handling.")
        return None

preprocess_model_output

preprocess_model_output(model_output: str) -> str

Preprocess model output by removing tool calls from thinking tags.

Parameters:

Name Type Description Default
model_output str

Raw model output string

required

Returns:

Type Description
str

Preprocessed model output with tool calls removed from thinking tags

Source code in vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
def preprocess_model_output(self, model_output: str) -> str:
    """
    Preprocess model output by removing tool calls from thinking tags.

    Args:
        model_output: Raw model output string

    Returns:
        Preprocessed model output with tool calls removed from thinking tags
    """

    def remove_tool_calls_from_think(match):
        think_content = match.group(1)
        cleaned_content = re.sub(r"<tool_calls>.*?</tool_calls>",
                                 "",
                                 think_content,
                                 flags=re.DOTALL)
        return f"<think>{cleaned_content}</think>"

    return re.sub(self.thinking_tag_pattern,
                  remove_tool_calls_from_think,
                  model_output,
                  flags=re.DOTALL)

MistralToolParser

Bases: ToolParser

Tool call parser for Mistral 7B Instruct v0.3, intended for use with - mistral_common - the examples/tool_chat_template_mistral.jinja template.

Used when --enable-auto-tool-choice --tool-call-parser mistral are all set

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
@ToolParserManager.register_module("mistral")
class MistralToolParser(ToolParser):
    """
    Tool call parser for Mistral 7B Instruct v0.3, intended for use with
    - [`mistral_common`](https://github.com/mistralai/mistral-common/)
    - the examples/tool_chat_template_mistral.jinja template.

    Used when --enable-auto-tool-choice --tool-call-parser mistral are all set
    """

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        if not isinstance(self.model_tokenizer, MistralTokenizer):
            logger.info("Non-Mistral tokenizer detected when using a Mistral "
                        "model...")

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: list[dict] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
        self.streamed_args_for_tool: list[str] = [
        ]  # map what has been streamed for each tool so far to a list
        self.bot_token = "[TOOL_CALLS]"
        self.bot_token_id = self.vocab.get(self.bot_token)
        self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
        if _is_fn_name_regex_support(self.model_tokenizer):
            self.fn_name_regex = re.compile(
                r'([a-zA-Z0-9_-]+)(\{[\s\S]*?\})(?=\s*$|,|\s)', re.DOTALL)
        else:
            self.fn_name_regex = None

        if self.bot_token_id is None:
            raise RuntimeError(
                "Mistral Tool Parser could not locate the tool call token in "
                "the tokenizer!")

    def adjust_request(
            self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        if not isinstance(
                self.model_tokenizer, MistralTokenizer
        ) and request.tools and request.tool_choice != 'none':
            # Do not skip special tokens when using chat template
            # with Mistral parser as TOOL_CALL token is needed
            # for tool detection.
            # Note: we don't want skip_special_tokens=False
            # with MistralTokenizer as it is incompatible
            request.skip_special_tokens = False
        return request

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        """
        Extract the tool calls from a complete model response. Requires
        find-and-replacing single quotes with double quotes for JSON parsing,
        make sure your tool call arguments don't ever include quotes!
        """

        # case -- if a tool call token is not present, return a text response
        if self.bot_token not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        # first remove the BOT token
        tool_content = model_output.replace(self.bot_token, "").strip()

        try:
            # we first try to directly load the json as parsing very nested
            # jsons is difficult
            try:
                if self.fn_name_regex:
                    matches = self.fn_name_regex.findall(tool_content)

                    function_call_arr = []
                    for match in matches:
                        fn_name = match[0]
                        args = match[1]

                        # fn_name is encoded outside serialized json dump
                        # only arguments are serialized
                        function_call_arr.append({
                            "name": fn_name,
                            "arguments": json.loads(args)
                        })
                else:
                    function_call_arr = json.loads(tool_content)
            except json.JSONDecodeError:
                # use a regex to find the part corresponding to the tool call.
                # NOTE: This use case should not happen if the model is trained
                # correctly. It's a easy possible fix so it's included, but
                # can be brittle for very complex / highly nested tool calls
                raw_tool_call = self.tool_call_regex.findall(tool_content)[0]
                function_call_arr = json.loads(raw_tool_call)

            # Tool Call
            tool_calls: list[MistralToolCall] = [
                MistralToolCall(
                    type="function",
                    function=FunctionCall(
                        name=raw_function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(raw_function_call["arguments"],
                                             ensure_ascii=False)))
                for raw_function_call in function_call_arr
            ]

            # get any content before  the tool call
            content = model_output.split(self.bot_token)[0]
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if len(content) > 0 else None)

        except Exception:
            logger.exception("Error in extracting tool call from response.")
            # return information to just treat the tool call as regular JSON
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=tool_content)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        # if the tool call token is not in the tokens generated so far, append
        # output to contents since it's not a tool
        if self.bot_token not in current_text:
            return DeltaMessage(content=delta_text)

        # if the tool call token ID IS in the tokens generated so far, that
        # means we're parsing as tool calls now

        # handle if we detected the BOT token which means the start of tool
        # calling
        if (self.bot_token_id in delta_token_ids
                and len(delta_token_ids) == 1):
            # if it's the only token, return None, so we don't send a chat
            # completion any don't send a control token
            return None

        # bit mask flags for partial JSON parsing. If the name hasn't been
        # sent yet, don't allow sending
        # an incomplete string since OpenAI only ever (as far as I have
        # seen) allows sending the entire tool/ function name at once.
        flags = Allow.ALL if self.current_tool_name_sent \
            else Allow.ALL & ~Allow.STR
        try:

            # replace BOT token with empty string, and convert single quotes
            # to double to allow parsing as JSON since mistral uses single
            # quotes instead of double for tool calls
            parsable_arr = current_text.split(self.bot_token)[-1]

            # tool calls are generated in an array, so do partial JSON
            # parsing on the entire array
            try:
                tool_call_arr: list[dict] = partial_json_parser.loads(
                    parsable_arr, flags)
            except partial_json_parser.core.exceptions.MalformedJSON:
                logger.debug('not enough tokens to parse into JSON yet')
                return None

            # select as the current tool call the one we're on the state at

            current_tool_call: dict = tool_call_arr[self.current_tool_id] \
                if len(tool_call_arr) > 0 else {}

            # case -- if no tokens have been streamed for the tool, e.g.
            #   only the array brackets, stream nothing
            if len(tool_call_arr) == 0:
                return None

            # case: we are starting a new tool in the array
            #   -> array has > 0 length AND length has moved past cursor
            elif (len(tool_call_arr) > 0
                  and len(tool_call_arr) > self.current_tool_id + 1):

                # if we're moving on to a new call, first make sure we
                # haven't missed anything in the previous one that was
                # auto-generated due to JSON completions, but wasn't
                # streamed to the client yet.
                if self.current_tool_id >= 0:
                    diff: Union[str, None] = current_tool_call.get("arguments")

                    if diff:
                        diff = json.dumps(diff, ensure_ascii=False).replace(
                            self.streamed_args_for_tool[self.current_tool_id],
                            "")
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=diff).model_dump(
                                                  exclude_none=True))
                        ])
                        self.streamed_args_for_tool[
                            self.current_tool_id] += diff
                    else:
                        delta = None
                else:
                    delta = None
                # re-set stuff pertaining to progress in the current tool
                self.current_tool_id = len(tool_call_arr) - 1
                self.current_tool_name_sent = False
                self.streamed_args_for_tool.append("")
                logger.debug("starting on new tool %d", self.current_tool_id)
                return delta

            # case: update an existing tool - this is handled below

            # if the current tool name hasn't been sent, send if available
            # - otherwise send nothing
            if not self.current_tool_name_sent:
                function_name = current_tool_call.get("name")
                if function_name:

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=MistralToolCall.generate_random_id(),
                                      function=DeltaFunctionCall(
                                          name=function_name).model_dump(
                                              exclude_none=True))
                    ])
                    self.current_tool_name_sent = True
                else:
                    delta = None

            # now we know we're on the same tool call and we're streaming
            # arguments
            else:

                prev_arguments = self.prev_tool_call_arr[
                    self.current_tool_id].get("arguments")
                cur_arguments = current_tool_call.get("arguments")

                new_text = delta_text.replace("\'", "\"")
                if ('"}' in new_text):
                    new_text = new_text[:new_text.rindex('"}')]

                if not cur_arguments and not prev_arguments:

                    delta = None
                elif not cur_arguments and prev_arguments:
                    logger.error(
                        "INVARIANT - impossible to have arguments reset "
                        "mid-arguments")
                    delta = None
                elif cur_arguments and not prev_arguments:
                    cur_arguments_json = json.dumps(cur_arguments,
                                                    ensure_ascii=False)[:-2]
                    logger.debug("finding %s in %s", new_text,
                                 cur_arguments_json)

                    if (new_text not in cur_arguments_json):
                        return None
                    arguments_delta = cur_arguments_json[:cur_arguments_json.
                                                         rindex(new_text) +
                                                         len(new_text)]
                    logger.debug("First tokens in arguments received: %s",
                                 arguments_delta)
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=arguments_delta).
                                      model_dump(exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += arguments_delta

                elif cur_arguments and prev_arguments:
                    cur_args_json = json.dumps(cur_arguments,
                                               ensure_ascii=False)
                    prev_args_json = json.dumps(prev_arguments,
                                                ensure_ascii=False)
                    logger.debug("Searching for diff between \n%s\n%s",
                                 cur_args_json, prev_args_json)

                    argument_diff = extract_intermediate_diff(
                        cur_args_json, prev_args_json)
                    logger.debug("got arguments diff: %s", argument_diff)
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=argument_diff).model_dump(
                                              exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += argument_diff
                else:
                    # try parsing it with regular JSON - if it works we're
                    # at the end, and we need to send the difference between
                    # tokens streamed so far and the valid JSON
                    delta = None

            # check to see if the name is defined and has been sent. if so,
            # stream the name - otherwise keep waiting
            # finish by setting old and returning None as base case
            self.prev_tool_call_arr = tool_call_arr
            return delta

        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

bot_token instance-attribute

bot_token = '[TOOL_CALLS]'

bot_token_id instance-attribute

bot_token_id = get(bot_token)

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

fn_name_regex instance-attribute

fn_name_regex = compile(
    "([a-zA-Z0-9_-]+)(\\{[\\s\\S]*?\\})(?=\\s*$|,|\\s)",
    DOTALL,
)

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_regex instance-attribute

tool_call_regex = compile('\\[{.*}\\]', DOTALL)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    if not isinstance(self.model_tokenizer, MistralTokenizer):
        logger.info("Non-Mistral tokenizer detected when using a Mistral "
                    "model...")

    # initialize properties used for state when parsing tool calls in
    # streaming mode
    self.prev_tool_call_arr: list[dict] = []
    self.current_tool_id: int = -1
    self.current_tool_name_sent: bool = False
    self.streamed_args_for_tool: list[str] = [
    ]  # map what has been streamed for each tool so far to a list
    self.bot_token = "[TOOL_CALLS]"
    self.bot_token_id = self.vocab.get(self.bot_token)
    self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
    if _is_fn_name_regex_support(self.model_tokenizer):
        self.fn_name_regex = re.compile(
            r'([a-zA-Z0-9_-]+)(\{[\s\S]*?\})(?=\s*$|,|\s)', re.DOTALL)
    else:
        self.fn_name_regex = None

    if self.bot_token_id is None:
        raise RuntimeError(
            "Mistral Tool Parser could not locate the tool call token in "
            "the tokenizer!")

adjust_request

adjust_request(
    request: ChatCompletionRequest,
) -> ChatCompletionRequest
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def adjust_request(
        self, request: ChatCompletionRequest) -> ChatCompletionRequest:
    if not isinstance(
            self.model_tokenizer, MistralTokenizer
    ) and request.tools and request.tool_choice != 'none':
        # Do not skip special tokens when using chat template
        # with Mistral parser as TOOL_CALL token is needed
        # for tool detection.
        # Note: we don't want skip_special_tokens=False
        # with MistralTokenizer as it is incompatible
        request.skip_special_tokens = False
    return request

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract the tool calls from a complete model response. Requires find-and-replacing single quotes with double quotes for JSON parsing, make sure your tool call arguments don't ever include quotes!

Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    """
    Extract the tool calls from a complete model response. Requires
    find-and-replacing single quotes with double quotes for JSON parsing,
    make sure your tool call arguments don't ever include quotes!
    """

    # case -- if a tool call token is not present, return a text response
    if self.bot_token not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    # first remove the BOT token
    tool_content = model_output.replace(self.bot_token, "").strip()

    try:
        # we first try to directly load the json as parsing very nested
        # jsons is difficult
        try:
            if self.fn_name_regex:
                matches = self.fn_name_regex.findall(tool_content)

                function_call_arr = []
                for match in matches:
                    fn_name = match[0]
                    args = match[1]

                    # fn_name is encoded outside serialized json dump
                    # only arguments are serialized
                    function_call_arr.append({
                        "name": fn_name,
                        "arguments": json.loads(args)
                    })
            else:
                function_call_arr = json.loads(tool_content)
        except json.JSONDecodeError:
            # use a regex to find the part corresponding to the tool call.
            # NOTE: This use case should not happen if the model is trained
            # correctly. It's a easy possible fix so it's included, but
            # can be brittle for very complex / highly nested tool calls
            raw_tool_call = self.tool_call_regex.findall(tool_content)[0]
            function_call_arr = json.loads(raw_tool_call)

        # Tool Call
        tool_calls: list[MistralToolCall] = [
            MistralToolCall(
                type="function",
                function=FunctionCall(
                    name=raw_function_call["name"],
                    # function call args are JSON but as a string
                    arguments=json.dumps(raw_function_call["arguments"],
                                         ensure_ascii=False)))
            for raw_function_call in function_call_arr
        ]

        # get any content before  the tool call
        content = model_output.split(self.bot_token)[0]
        return ExtractedToolCallInformation(
            tools_called=True,
            tool_calls=tool_calls,
            content=content if len(content) > 0 else None)

    except Exception:
        logger.exception("Error in extracting tool call from response.")
        # return information to just treat the tool call as regular JSON
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=tool_content)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    # if the tool call token is not in the tokens generated so far, append
    # output to contents since it's not a tool
    if self.bot_token not in current_text:
        return DeltaMessage(content=delta_text)

    # if the tool call token ID IS in the tokens generated so far, that
    # means we're parsing as tool calls now

    # handle if we detected the BOT token which means the start of tool
    # calling
    if (self.bot_token_id in delta_token_ids
            and len(delta_token_ids) == 1):
        # if it's the only token, return None, so we don't send a chat
        # completion any don't send a control token
        return None

    # bit mask flags for partial JSON parsing. If the name hasn't been
    # sent yet, don't allow sending
    # an incomplete string since OpenAI only ever (as far as I have
    # seen) allows sending the entire tool/ function name at once.
    flags = Allow.ALL if self.current_tool_name_sent \
        else Allow.ALL & ~Allow.STR
    try:

        # replace BOT token with empty string, and convert single quotes
        # to double to allow parsing as JSON since mistral uses single
        # quotes instead of double for tool calls
        parsable_arr = current_text.split(self.bot_token)[-1]

        # tool calls are generated in an array, so do partial JSON
        # parsing on the entire array
        try:
            tool_call_arr: list[dict] = partial_json_parser.loads(
                parsable_arr, flags)
        except partial_json_parser.core.exceptions.MalformedJSON:
            logger.debug('not enough tokens to parse into JSON yet')
            return None

        # select as the current tool call the one we're on the state at

        current_tool_call: dict = tool_call_arr[self.current_tool_id] \
            if len(tool_call_arr) > 0 else {}

        # case -- if no tokens have been streamed for the tool, e.g.
        #   only the array brackets, stream nothing
        if len(tool_call_arr) == 0:
            return None

        # case: we are starting a new tool in the array
        #   -> array has > 0 length AND length has moved past cursor
        elif (len(tool_call_arr) > 0
              and len(tool_call_arr) > self.current_tool_id + 1):

            # if we're moving on to a new call, first make sure we
            # haven't missed anything in the previous one that was
            # auto-generated due to JSON completions, but wasn't
            # streamed to the client yet.
            if self.current_tool_id >= 0:
                diff: Union[str, None] = current_tool_call.get("arguments")

                if diff:
                    diff = json.dumps(diff, ensure_ascii=False).replace(
                        self.streamed_args_for_tool[self.current_tool_id],
                        "")
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=diff).model_dump(
                                              exclude_none=True))
                    ])
                    self.streamed_args_for_tool[
                        self.current_tool_id] += diff
                else:
                    delta = None
            else:
                delta = None
            # re-set stuff pertaining to progress in the current tool
            self.current_tool_id = len(tool_call_arr) - 1
            self.current_tool_name_sent = False
            self.streamed_args_for_tool.append("")
            logger.debug("starting on new tool %d", self.current_tool_id)
            return delta

        # case: update an existing tool - this is handled below

        # if the current tool name hasn't been sent, send if available
        # - otherwise send nothing
        if not self.current_tool_name_sent:
            function_name = current_tool_call.get("name")
            if function_name:

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=MistralToolCall.generate_random_id(),
                                  function=DeltaFunctionCall(
                                      name=function_name).model_dump(
                                          exclude_none=True))
                ])
                self.current_tool_name_sent = True
            else:
                delta = None

        # now we know we're on the same tool call and we're streaming
        # arguments
        else:

            prev_arguments = self.prev_tool_call_arr[
                self.current_tool_id].get("arguments")
            cur_arguments = current_tool_call.get("arguments")

            new_text = delta_text.replace("\'", "\"")
            if ('"}' in new_text):
                new_text = new_text[:new_text.rindex('"}')]

            if not cur_arguments and not prev_arguments:

                delta = None
            elif not cur_arguments and prev_arguments:
                logger.error(
                    "INVARIANT - impossible to have arguments reset "
                    "mid-arguments")
                delta = None
            elif cur_arguments and not prev_arguments:
                cur_arguments_json = json.dumps(cur_arguments,
                                                ensure_ascii=False)[:-2]
                logger.debug("finding %s in %s", new_text,
                             cur_arguments_json)

                if (new_text not in cur_arguments_json):
                    return None
                arguments_delta = cur_arguments_json[:cur_arguments_json.
                                                     rindex(new_text) +
                                                     len(new_text)]
                logger.debug("First tokens in arguments received: %s",
                             arguments_delta)
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=arguments_delta).
                                  model_dump(exclude_none=True))
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] += arguments_delta

            elif cur_arguments and prev_arguments:
                cur_args_json = json.dumps(cur_arguments,
                                           ensure_ascii=False)
                prev_args_json = json.dumps(prev_arguments,
                                            ensure_ascii=False)
                logger.debug("Searching for diff between \n%s\n%s",
                             cur_args_json, prev_args_json)

                argument_diff = extract_intermediate_diff(
                    cur_args_json, prev_args_json)
                logger.debug("got arguments diff: %s", argument_diff)
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  function=DeltaFunctionCall(
                                      arguments=argument_diff).model_dump(
                                          exclude_none=True))
                ])
                self.streamed_args_for_tool[
                    self.current_tool_id] += argument_diff
            else:
                # try parsing it with regular JSON - if it works we're
                # at the end, and we need to send the difference between
                # tokens streamed so far and the valid JSON
                delta = None

        # check to see if the name is defined and has been sent. if so,
        # stream the name - otherwise keep waiting
        # finish by setting old and returning None as base case
        self.prev_tool_call_arr = tool_call_arr
        return delta

    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

Phi4MiniJsonToolParser

Bases: ToolParser

Tool call parser for phi-4-mini models intended for use with the examples/tool_chat_template_llama.jinja template.

Used when --enable-auto-tool-choice --tool-call-parser phi4_mini_json
are all set

Source code in vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py
@ToolParserManager.register_module("phi4_mini_json")
class Phi4MiniJsonToolParser(ToolParser):
    """
    Tool call parser for phi-4-mini models intended for use with the
    examples/tool_chat_template_llama.jinja template.

    Used when --enable-auto-tool-choice --tool-call-parser phi4_mini_json  
    are all set
    """

    def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None:
        super().__init__(tokenizer)

        # initialize properties used for state when parsing tool calls in
        # streaming mode
        self.prev_tool_call_arr: list[dict[str, Any]] = []
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
        self.streamed_args_for_tool: list[str] = [
        ]  # map what has been streamed for each tool so far to a list
        self.bot_token: str = "functools"

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        """
        Extract the tool calls from a complete model response.
        """
        logger.debug("Model output: %s", model_output)

        pattern = r'functools\[(.*?)\]'
        matches = re.search(pattern, model_output, re.DOTALL)

        if not matches:
            logger.debug("No function calls found")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        try:
            function_call_arr: list[dict[str, Any]] = []
            try:
                json_content = '[' + matches.group(1) + ']'

                function_call_arr = json.loads(json_content)
                logger.debug("Successfully extracted %d function calls",
                             len(function_call_arr))
            except json.JSONDecodeError as e:
                logger.error(
                    "Failed to parse function calls from model output. "
                    "Error: %s", str(e))

            tool_calls: list[ToolCall] = [
                ToolCall(
                    id=make_tool_call_id(),
                    type="function",
                    function=FunctionCall(
                        name=raw_function_call["name"],
                        # function call args are JSON but as a string
                        arguments=json.dumps(
                            raw_function_call["arguments"]
                            if "arguments" in raw_function_call else
                            raw_function_call["parameters"],
                            ensure_ascii=False),
                    )) for raw_function_call in function_call_arr
            ]

            # get any content before the tool call
            ret = ExtractedToolCallInformation(tools_called=True,
                                               tool_calls=tool_calls,
                                               content=None)
            return ret

        except Exception:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Optional[DeltaMessage]:

        return None

bot_token instance-attribute

bot_token: str = 'functools'

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict[str, Any]] = []

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

__init__

__init__(tokenizer: PreTrainedTokenizerBase) -> None
Source code in vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py
def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None:
    super().__init__(tokenizer)

    # initialize properties used for state when parsing tool calls in
    # streaming mode
    self.prev_tool_call_arr: list[dict[str, Any]] = []
    self.current_tool_id: int = -1
    self.current_tool_name_sent: bool = False
    self.streamed_args_for_tool: list[str] = [
    ]  # map what has been streamed for each tool so far to a list
    self.bot_token: str = "functools"

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract the tool calls from a complete model response.

Source code in vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    """
    Extract the tool calls from a complete model response.
    """
    logger.debug("Model output: %s", model_output)

    pattern = r'functools\[(.*?)\]'
    matches = re.search(pattern, model_output, re.DOTALL)

    if not matches:
        logger.debug("No function calls found")
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    try:
        function_call_arr: list[dict[str, Any]] = []
        try:
            json_content = '[' + matches.group(1) + ']'

            function_call_arr = json.loads(json_content)
            logger.debug("Successfully extracted %d function calls",
                         len(function_call_arr))
        except json.JSONDecodeError as e:
            logger.error(
                "Failed to parse function calls from model output. "
                "Error: %s", str(e))

        tool_calls: list[ToolCall] = [
            ToolCall(
                id=make_tool_call_id(),
                type="function",
                function=FunctionCall(
                    name=raw_function_call["name"],
                    # function call args are JSON but as a string
                    arguments=json.dumps(
                        raw_function_call["arguments"]
                        if "arguments" in raw_function_call else
                        raw_function_call["parameters"],
                        ensure_ascii=False),
                )) for raw_function_call in function_call_arr
        ]

        # get any content before the tool call
        ret = ExtractedToolCallInformation(tools_called=True,
                                           tool_calls=tool_calls,
                                           content=None)
        return ret

    except Exception:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Optional[DeltaMessage]
Source code in vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Optional[DeltaMessage]:

    return None

PythonicToolParser

Bases: ToolParser

Tool call parser for models that produce tool calls in a pythonic style, such as Llama 3.2 and Llama 4 models.

Used when --enable-auto-tool-choice --tool-call-parser pythonic are all set

Source code in vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
@ToolParserManager.register_module("pythonic")
class PythonicToolParser(ToolParser):
    """
    Tool call parser for models that produce tool calls in a pythonic style,
    such as Llama 3.2 and Llama 4 models.

    Used when --enable-auto-tool-choice --tool-call-parser pythonic are all set
    """
    # TODO(mdepinet): Possible future improvements:
    #   1. Support text + tools separated by either <|python_tag|> or \n\n
    #   2. Support tools outside of a list (or separated by a semicolon).
    #      This depends on item 1 for consistent streaming.
    # Neither of these are necessary for e.g. ToolACE, but both would help make
    # Llama3.2 models more reliable.

    TOOL_CALL_REGEX = re.compile(
        r"\[([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s)?\),\s*)*([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s*)?\)\s*)+\]",
        re.DOTALL)

    def __init__(self, tokenizer: PreTrainedTokenizerBase):
        super().__init__(tokenizer)

    # Rename for readability. This is NOT a tool id.
    @property
    def current_tool_index(self) -> int:
        return self.current_tool_id

    @current_tool_index.setter
    def current_tool_index(self, value: int) -> None:
        self.current_tool_id = value

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        """
        Extract the tool calls from a complete model response.
        """
        is_tool_call_pattern = False
        try:
            is_tool_call_pattern = self.TOOL_CALL_REGEX.match(
                model_output,
                timeout=envs.VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS) is not None
        except TimeoutError:
            logger.warning(
                "Regex timeout occurred when matching tool call pattern.")
            logger.debug("Regex timeout occurred when matching user input: %s",
                         model_output)

        if not is_tool_call_pattern:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        try:
            module = ast.parse(model_output)
            parsed = getattr(module.body[0], "value", None)
            if isinstance(parsed, ast.List) and all(
                    isinstance(e, ast.Call) for e in parsed.elts):
                return ExtractedToolCallInformation(
                    tools_called=True,
                    tool_calls=[
                        _handle_single_tool(e)  # type: ignore
                        for e in parsed.elts
                    ],
                    content=None)
            else:
                raise _UnexpectedAstError(
                    "Tool output must be a list of function calls")
        except Exception:
            logger.exception("Error in extracting tool call from response.")
            # Treat as regular text
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        if not current_text.startswith("["):
            return DeltaMessage(content=delta_text)

        try:
            valid_and_added_text = _make_valid_python(current_text)
            if valid_and_added_text is None:
                return None
            valid_text, added_text = valid_and_added_text

            module = ast.parse(valid_text)
            parsed = getattr(module.body[0], "value", None)
            if not isinstance(parsed, ast.List) or not all(
                    isinstance(e, ast.Call) for e in parsed.elts):
                raise _UnexpectedAstError(
                    "Tool output must be a list of function calls")
            tool_calls = [
                _handle_single_tool(e)  # type: ignore
                for e in parsed.elts
            ]

            tool_deltas = []
            for index, new_call in enumerate(tool_calls):
                if index < self.current_tool_index:
                    continue

                self.current_tool_index = index
                if len(self.streamed_args_for_tool) == index:
                    self.streamed_args_for_tool.append("")

                new_call_complete = index < len(
                    tool_calls) - 1 or ")]" not in added_text
                if new_call_complete:
                    self.current_tool_index += 1

                withheld_suffix = (added_text[:-2]
                                   if not new_call_complete else "")
                if not new_call_complete and added_text[-2] == ")":
                    # Function call is incomplete. Withhold the closing bracket.
                    withheld_suffix = withheld_suffix + "}"
                # Strings get single quotes in the model-produced string.
                # JSON requires double quotes.
                withheld_suffix = withheld_suffix.replace("'", '"')
                delta = _compute_tool_delta(self.streamed_args_for_tool[index],
                                            new_call, index, withheld_suffix)

                if delta is not None:
                    tool_deltas.append(delta)
                    if (delta.function is not None
                            and delta.function.arguments is not None):
                        self.streamed_args_for_tool[
                            index] += delta.function.arguments

            # HACK: serving_chat.py inspects the internal state of tool parsers
            # when determining it's final streaming delta, automatically
            # adding autocompleted JSON.
            # These two lines avoid that nonsense while ensuring finish_reason
            # is set to tool_calls when at least one tool is called.
            if tool_deltas and not self.prev_tool_call_arr:
                self.prev_tool_call_arr = [{"arguments": {}}]

            if tool_deltas:
                return DeltaMessage(tool_calls=tool_deltas)
            elif not added_text and self.current_tool_id > 0:
                # Return an empty DeltaMessage once the tool calls are all done
                # so that finish_reason gets set.
                return DeltaMessage(content='')
            else:
                return None
        except Exception:
            logger.exception("Error trying to handle streaming tool call.")
            logger.debug(
                "Skipping chunk as a result of tool streaming extraction "
                "error")
            return None

TOOL_CALL_REGEX class-attribute instance-attribute

TOOL_CALL_REGEX = compile(
    "\\[([a-zA-Z]+\\w*\\(([a-zA-Z]+\\w*=.*,\\s*)*([a-zA-Z]+\\w*=.*\\s)?\\),\\s*)*([a-zA-Z]+\\w*\\(([a-zA-Z]+\\w*=.*,\\s*)*([a-zA-Z]+\\w*=.*\\s*)?\\)\\s*)+\\]",
    DOTALL,
)

current_tool_index property writable

current_tool_index: int

__init__

__init__(tokenizer: PreTrainedTokenizerBase)
Source code in vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
def __init__(self, tokenizer: PreTrainedTokenizerBase):
    super().__init__(tokenizer)

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract the tool calls from a complete model response.

Source code in vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    """
    Extract the tool calls from a complete model response.
    """
    is_tool_call_pattern = False
    try:
        is_tool_call_pattern = self.TOOL_CALL_REGEX.match(
            model_output,
            timeout=envs.VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS) is not None
    except TimeoutError:
        logger.warning(
            "Regex timeout occurred when matching tool call pattern.")
        logger.debug("Regex timeout occurred when matching user input: %s",
                     model_output)

    if not is_tool_call_pattern:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    try:
        module = ast.parse(model_output)
        parsed = getattr(module.body[0], "value", None)
        if isinstance(parsed, ast.List) and all(
                isinstance(e, ast.Call) for e in parsed.elts):
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=[
                    _handle_single_tool(e)  # type: ignore
                    for e in parsed.elts
                ],
                content=None)
        else:
            raise _UnexpectedAstError(
                "Tool output must be a list of function calls")
    except Exception:
        logger.exception("Error in extracting tool call from response.")
        # Treat as regular text
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    if not current_text.startswith("["):
        return DeltaMessage(content=delta_text)

    try:
        valid_and_added_text = _make_valid_python(current_text)
        if valid_and_added_text is None:
            return None
        valid_text, added_text = valid_and_added_text

        module = ast.parse(valid_text)
        parsed = getattr(module.body[0], "value", None)
        if not isinstance(parsed, ast.List) or not all(
                isinstance(e, ast.Call) for e in parsed.elts):
            raise _UnexpectedAstError(
                "Tool output must be a list of function calls")
        tool_calls = [
            _handle_single_tool(e)  # type: ignore
            for e in parsed.elts
        ]

        tool_deltas = []
        for index, new_call in enumerate(tool_calls):
            if index < self.current_tool_index:
                continue

            self.current_tool_index = index
            if len(self.streamed_args_for_tool) == index:
                self.streamed_args_for_tool.append("")

            new_call_complete = index < len(
                tool_calls) - 1 or ")]" not in added_text
            if new_call_complete:
                self.current_tool_index += 1

            withheld_suffix = (added_text[:-2]
                               if not new_call_complete else "")
            if not new_call_complete and added_text[-2] == ")":
                # Function call is incomplete. Withhold the closing bracket.
                withheld_suffix = withheld_suffix + "}"
            # Strings get single quotes in the model-produced string.
            # JSON requires double quotes.
            withheld_suffix = withheld_suffix.replace("'", '"')
            delta = _compute_tool_delta(self.streamed_args_for_tool[index],
                                        new_call, index, withheld_suffix)

            if delta is not None:
                tool_deltas.append(delta)
                if (delta.function is not None
                        and delta.function.arguments is not None):
                    self.streamed_args_for_tool[
                        index] += delta.function.arguments

        # HACK: serving_chat.py inspects the internal state of tool parsers
        # when determining it's final streaming delta, automatically
        # adding autocompleted JSON.
        # These two lines avoid that nonsense while ensuring finish_reason
        # is set to tool_calls when at least one tool is called.
        if tool_deltas and not self.prev_tool_call_arr:
            self.prev_tool_call_arr = [{"arguments": {}}]

        if tool_deltas:
            return DeltaMessage(tool_calls=tool_deltas)
        elif not added_text and self.current_tool_id > 0:
            # Return an empty DeltaMessage once the tool calls are all done
            # so that finish_reason gets set.
            return DeltaMessage(content='')
        else:
            return None
    except Exception:
        logger.exception("Error trying to handle streaming tool call.")
        logger.debug(
            "Skipping chunk as a result of tool streaming extraction "
            "error")
        return None

Qwen3CoderToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
@ToolParserManager.register_module(["qwen3_coder"])
class Qwen3CoderToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        self.current_tool_name_sent: bool = False
        self.prev_tool_call_arr: list[dict] = []
        self.streamed_args_for_tool: list[str] = []

        # Sentinel tokens for streaming mode
        self.tool_call_start_token: str = "<tool_call>"
        self.tool_call_end_token: str = "</tool_call>"
        self.tool_call_prefix: str = "<function="
        self.function_end_token: str = "</function>"
        self.parameter_prefix: str = "<parameter="
        self.parameter_end_token: str = "</parameter>"
        self.is_tool_call_started: bool = False
        self.failed_count: int = 0

        # Streaming state variables
        self.current_tool_index: int = 0
        self.header_sent: bool = False
        self.current_tool_string_id: Optional[str] = None
        self.current_function_name: Optional[str] = None
        self.current_param_name: Optional[str] = None
        self.current_param_value: str = ""
        self.param_count: int = 0
        self.in_param: bool = False
        self.in_function: bool = False
        self.accumulated_text: str = ""
        self.json_started: bool = False
        self.json_closed: bool = False

        # Enhanced streaming state - reset for each new message
        self._reset_streaming_state()

        # Regex patterns
        self.tool_call_complete_regex = re.compile(
            r"<tool_call>(.*?)</tool_call>", re.DOTALL)
        self.tool_call_regex = re.compile(
            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL)
        self.tool_call_function_regex = re.compile(
            r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
        self.tool_call_parameter_regex = re.compile(
            r"<parameter=(.*?)</parameter>|<parameter=(.*?)$", re.DOTALL)

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction.")

        self.tool_call_start_token_id = self.vocab.get(
            self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

        if (self.tool_call_start_token_id is None
                or self.tool_call_end_token_id is None):
            raise RuntimeError(
                "Qwen3 XML Tool parser could not locate tool call start/end "
                "tokens in the tokenizer!")

        logger.debug("vLLM Successfully import tool parser %s !",
                     self.__class__.__name__)

    def _generate_tool_call_id(self) -> str:
        """Generate a unique tool call ID."""
        return f"call_{uuid.uuid4().hex[:24]}"

    def _reset_streaming_state(self):
        """Reset all streaming state."""
        self.current_tool_index = 0
        self.is_tool_call_started = False
        self.header_sent = False
        self.current_tool_string_id = None
        self.current_function_name = None
        self.current_param_name = None
        self.current_param_value = ""
        self.param_count = 0
        self.in_param = False
        self.in_function = False
        self.accumulated_text = ""
        self.json_started = False
        self.json_closed = False

    def _parse_xml_function_call(
            self, function_call_str: str,
            tools: Optional[list[ChatCompletionToolsParam]]
    ) -> Optional[ToolCall]:

        def get_arguments_config(func_name: str) -> dict:
            if tools is None:
                return {}
            for config in tools:
                if not hasattr(config, "type") or not (
                        hasattr(config, "function")
                        and hasattr(config.function, "name")):
                    continue
                if (config.type == "function"
                        and config.function.name == func_name):
                    if not hasattr(config.function, "parameters"):
                        return {}
                    params = config.function.parameters
                    if isinstance(params, dict) and "properties" in params:
                        return params["properties"]
                    elif isinstance(params, dict):
                        return params
                    else:
                        return {}
            logger.warning("Tool '%s' is not defined in the tools list.",
                           func_name)
            return {}

        def convert_param_value(param_value: str, param_name: str,
                                param_config: dict, func_name: str) -> Any:
            # Handle null value for any type
            if param_value.lower() == "null":
                return None

            converted_value: Any

            if param_name not in param_config:
                if param_config != {}:
                    logger.warning(
                        "Parsed parameter '%s' is not defined in the tool "
                        "parameters for tool '%s', directly returning the "
                        "string value.", param_name, func_name)
                return param_value

            if (isinstance(param_config[param_name], dict)
                    and "type" in param_config[param_name]):
                param_type = str(
                    param_config[param_name]["type"]).strip().lower()
            else:
                param_type = "string"
            if param_type in [
                    "string", "str", "text", "varchar", "char", "enum"
            ]:
                return param_value
            elif (param_type.startswith("int") or param_type.startswith("uint")
                  or param_type.startswith("long")
                  or param_type.startswith("short")
                  or param_type.startswith("unsigned")):
                try:
                    converted_value = int(param_value)
                    return converted_value
                except ValueError:
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not an "
                        "integer in tool '%s', degenerating to string.",
                        param_value, param_name, func_name)
                return param_value
            elif (param_type.startswith("num")
                  or param_type.startswith("float")):
                try:
                    float_param_value = float(param_value)
                    converted_value = (float_param_value if float_param_value -
                                       int(float_param_value) != 0 else
                                       int(float_param_value))
                    return converted_value
                except ValueError:
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not a float "
                        "in tool '%s', degenerating to string.", param_value,
                        param_name, func_name)
                return param_value
            elif param_type in ["boolean", "bool", "binary"]:
                param_value = param_value.lower()
                if param_value not in ["true", "false"]:
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not a "
                        "boolean (`true` of `false`) in tool '%s', "
                        "degenerating to false.", param_value, param_name,
                        func_name)
                return param_value == "true"
            else:
                if param_type == "object" or param_type.startswith("dict"):
                    try:
                        converted_value = json.loads(param_value)
                        return converted_value
                    except json.JSONDecodeError:
                        logger.warning(
                            "Parsed value '%s' of parameter '%s' is not a "
                            "valid JSON object in tool '%s', will try other "
                            "methods to parse it.", param_value, param_name,
                            func_name)
                logger.warning(
                    "Parameter '%s' has unknown type '%s'. "
                    "The value will be treated as a string.", param_name,
                    param_type)
                return param_value

        # Extract function name
        end_index = function_call_str.index(">")
        function_name = function_call_str[:end_index]
        param_config = get_arguments_config(function_name)
        parameters = function_call_str[end_index + 1:]
        param_dict = {}
        for match in self.tool_call_parameter_regex.findall(parameters):
            match_text = match[0] if match[0] else match[1]
            idx = match_text.index(">")
            param_name = match_text[:idx]
            param_value = str(match_text[idx + 1:])
            # Remove prefix and trailing \n
            if param_value.startswith("\n"):
                param_value = param_value[1:]
            if param_value.endswith("\n"):
                param_value = param_value[:-1]

            param_dict[param_name] = convert_param_value(
                param_value, param_name, param_config, function_name)
        return ToolCall(
            type="function",
            function=FunctionCall(name=function_name,
                                  arguments=json.dumps(param_dict,
                                                       ensure_ascii=False)),
        )

    def _get_function_calls(self, model_output: str) -> list[str]:
        # Find all tool calls
        matched_ranges = self.tool_call_regex.findall(model_output)
        raw_tool_calls = [
            match[0] if match[0] else match[1] for match in matched_ranges
        ]

        # Back-off strategy if no tool_call tags found
        if len(raw_tool_calls) == 0:
            raw_tool_calls = [model_output]

        raw_function_calls = []
        for tool_call in raw_tool_calls:
            raw_function_calls.extend(
                self.tool_call_function_regex.findall(tool_call))

        function_calls = [
            match[0] if match[0] else match[1] for match in raw_function_calls
        ]
        return function_calls

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        # Quick check to avoid unnecessary processing
        if self.tool_call_prefix not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        try:
            function_calls = self._get_function_calls(model_output)
            if len(function_calls) == 0:
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)

            tool_calls = [
                self._parse_xml_function_call(function_call_str, request.tools)
                for function_call_str in function_calls
            ]

            # Populate prev_tool_call_arr for serving layer to set
            # finish_reason
            self.prev_tool_call_arr.clear()  # Clear previous calls
            for tool_call in tool_calls:
                if tool_call:
                    self.prev_tool_call_arr.append({
                        "name":
                        tool_call.function.name,
                        "arguments":
                        tool_call.function.arguments,
                    })

            # Extract content before tool calls
            content_index = model_output.find(self.tool_call_start_token)
            content_index = (content_index if content_index >= 0 else
                             model_output.find(self.tool_call_prefix))
            content = model_output[:content_index]  # .rstrip()

            return ExtractedToolCallInformation(
                tools_called=(len(tool_calls) > 0),
                tool_calls=tool_calls,
                content=content if content else None,
            )

        except Exception:
            logger.exception("Error in extracting tool call from response.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        # If no delta text, return None unless it's an EOS token after tool
        # calls
        if not delta_text:
            # Check if this is an EOS token after all tool calls are complete
            # We check for tool calls in the text even if is_tool_call_started
            # is False because it might have been reset after processing all
            # tools
            if (delta_token_ids
                    and self.tool_call_end_token_id not in delta_token_ids):
                # Count complete tool calls
                complete_calls = len(
                    self.tool_call_complete_regex.findall(current_text))

                # If we have completed tool calls and populated
                # prev_tool_call_arr
                if (complete_calls > 0 and len(self.prev_tool_call_arr) > 0):
                    # Check if all tool calls are closed
                    open_calls = (
                        current_text.count(self.tool_call_start_token) -
                        current_text.count(self.tool_call_end_token))
                    if open_calls == 0:
                        # Return empty delta message to allow finish_reason
                        # processing
                        return DeltaMessage(content="")
                elif not self.is_tool_call_started and current_text:
                    # This is a regular content response that's now complete
                    return DeltaMessage(content="")
            return None

        # Check if this is the first call (reset state if needed)
        if not previous_text:
            self._reset_streaming_state()

        # Update accumulated text
        self.accumulated_text = current_text

        # Check if we need to advance to next tool
        if self.json_closed and not self.in_function:
            # Check if this tool call has ended
            tool_ends = current_text.count(self.tool_call_end_token)
            if tool_ends > self.current_tool_index:
                # This tool has ended, advance to next
                self.current_tool_index += 1
                self.header_sent = False
                self.param_count = 0
                self.json_started = False
                self.json_closed = False

                # Check if there are more tool calls
                tool_starts_count = current_text.count(
                    self.tool_call_start_token)
                if self.current_tool_index >= tool_starts_count:
                    # No more tool calls
                    self.is_tool_call_started = False
                # Continue processing next tool
                return None

        # Handle normal content before tool calls
        if not self.is_tool_call_started:
            # Check if tool call is starting
            if (self.tool_call_start_token_id in delta_token_ids
                    or self.tool_call_start_token in delta_text):
                self.is_tool_call_started = True
                # Return any content before the tool call
                if self.tool_call_start_token in delta_text:
                    content_before = delta_text[:delta_text.index(
                        self.tool_call_start_token)]
                    if content_before:
                        return DeltaMessage(content=content_before)
                return None
            else:
                # Check if we're between tool calls - skip whitespace
                if (current_text.rstrip().endswith(self.tool_call_end_token)
                        and delta_text.strip() == ""):
                    # We just ended a tool call, skip whitespace
                    return None
                # Normal content, no tool call
                return DeltaMessage(content=delta_text)

        # Check if we're between tool calls (waiting for next one)
        # Count tool calls we've seen vs processed
        tool_starts_count = current_text.count(self.tool_call_start_token)
        if self.current_tool_index >= tool_starts_count:
            # We're past all tool calls, shouldn't be here
            return None

        # We're in a tool call, find the current tool call portion
        # Need to find the correct tool call based on current_tool_index
        tool_starts: list[int] = []
        idx = 0
        while True:
            idx = current_text.find(self.tool_call_start_token, idx)
            if idx == -1:
                break
            tool_starts.append(idx)
            idx += len(self.tool_call_start_token)

        if self.current_tool_index >= len(tool_starts):
            # No more tool calls to process yet
            return None

        tool_start_idx = tool_starts[self.current_tool_index]
        # Find where this tool call ends (or current position if not ended yet)
        tool_end_idx = current_text.find(self.tool_call_end_token,
                                         tool_start_idx)
        if tool_end_idx == -1:
            tool_text = current_text[tool_start_idx:]
        else:
            tool_text = current_text[tool_start_idx:tool_end_idx +
                                     len(self.tool_call_end_token)]

        # Looking for function header
        if not self.header_sent:
            if self.tool_call_prefix in tool_text:
                func_start = (tool_text.find(self.tool_call_prefix) +
                              len(self.tool_call_prefix))
                func_end = tool_text.find(">", func_start)

                if func_end != -1:
                    # Found complete function name
                    self.current_function_name = tool_text[func_start:func_end]
                    self.current_tool_string_id = self._generate_tool_call_id()
                    self.header_sent = True
                    self.in_function = True

                    # IMPORTANT: Add to prev_tool_call_arr immediately when we
                    # detect a tool call. This ensures
                    # finish_reason="tool_calls" even if parsing isn't complete
                    already_added = any(
                        tool.get("name") == self.current_function_name
                        for tool in self.prev_tool_call_arr)
                    if not already_added:
                        self.prev_tool_call_arr.append({
                            "name": self.current_function_name,
                            "arguments":
                            "{}",  # Placeholder, will be updated later
                        })

                    # Send header with function info
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_index,
                            id=self.current_tool_string_id,
                            function=DeltaFunctionCall(
                                name=self.current_function_name, arguments=""),
                            type="function",
                        )
                    ])
            return None

        # We've sent header, now handle function body
        if self.in_function:
            # Send opening brace if not sent yet
            if (not self.json_started
                    and self.parameter_prefix not in delta_text):
                self.json_started = True
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        function=DeltaFunctionCall(arguments="{"),
                    )
                ])

            # Make sure json_started is set if we're processing parameters
            if not self.json_started:
                self.json_started = True

            # Check for function end in accumulated text
            if not self.json_closed and self.function_end_token in tool_text:
                # Close JSON
                self.json_closed = True

                # Extract the complete tool call to update prev_tool_call_arr
                # with final arguments. Find the function content
                func_start = (tool_text.find(self.tool_call_prefix) +
                              len(self.tool_call_prefix))
                func_content_end = tool_text.find(self.function_end_token,
                                                  func_start)
                if func_content_end != -1:
                    func_content = tool_text[func_start:func_content_end]
                    # Parse to get the complete arguments
                    try:
                        parsed_tool = self._parse_xml_function_call(
                            func_content, request.tools if request else None)
                        if parsed_tool:
                            # Update existing entry in prev_tool_call_arr with
                            # complete arguments
                            for i, tool in enumerate(self.prev_tool_call_arr):
                                if (tool.get("name") ==
                                        parsed_tool.function.name):
                                    self.prev_tool_call_arr[i]["arguments"] = (
                                        parsed_tool.function.arguments)
                                    break
                    except Exception:
                        pass  # Ignore parsing errors during streaming

                result = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        function=DeltaFunctionCall(arguments="}"),
                    )
                ])

                # Reset state for next tool
                self.in_function = False
                self.json_closed = True

                return result

            # Look for parameters
            # Count how many complete parameters we have processed
            complete_params = tool_text.count(self.parameter_end_token)

            # Check if we should start a new parameter
            if not self.in_param and self.param_count < complete_params:
                # Find the unprocessed parameter
                # Count parameter starts
                param_starts = []
                idx = 0
                while True:
                    idx = tool_text.find(self.parameter_prefix, idx)
                    if idx == -1:
                        break
                    param_starts.append(idx)
                    idx += len(self.parameter_prefix)

                if len(param_starts) > self.param_count:
                    # Process the next parameter
                    param_idx = param_starts[self.param_count]
                    param_start = param_idx + len(self.parameter_prefix)
                    remaining = tool_text[param_start:]

                    if ">" in remaining:
                        # We have the complete parameter name
                        name_end = remaining.find(">")
                        self.current_param_name = remaining[:name_end]

                        # Find the parameter value
                        value_start = param_start + name_end + 1
                        value_text = tool_text[value_start:]
                        if value_text.startswith("\n"):
                            value_text = value_text[1:]

                        # Find where this parameter ends
                        param_end_idx = value_text.find(
                            self.parameter_end_token)
                        if param_end_idx != -1:
                            # Complete parameter found
                            param_value = value_text[:param_end_idx]
                            if param_value.endswith("\n"):
                                param_value = param_value[:-1]

                            # Build complete JSON fragment for this parameter
                            if self.param_count == 0:
                                json_fragment = (
                                    '"' + self.current_param_name + '": "' +
                                    json.dumps(param_value)[1:-1] + '"')
                            else:
                                json_fragment = (
                                    ', "' + self.current_param_name + '": "' +
                                    json.dumps(param_value)[1:-1] + '"')

                            self.param_count += 1

                            return DeltaMessage(tool_calls=[
                                DeltaToolCall(
                                    index=self.current_tool_index,
                                    function=DeltaFunctionCall(
                                        arguments=json_fragment),
                                )
                            ])

            # Continue parameter value
            if self.in_param:
                if self.parameter_end_token in delta_text:
                    # End of parameter
                    end_idx = delta_text.find(self.parameter_end_token)
                    value_chunk = delta_text[:end_idx]

                    # Skip past > if at start
                    if not self.current_param_value and ">" in value_chunk:
                        gt_idx = value_chunk.find(">")
                        value_chunk = value_chunk[gt_idx + 1:]

                    if (not self.current_param_value
                            and value_chunk.startswith("\n")):
                        value_chunk = value_chunk[1:]

                    # Calculate incremental JSON
                    full_value = self.current_param_value + value_chunk
                    prev_escaped = (json.dumps(self.current_param_value)[1:-1]
                                    if self.current_param_value else "")
                    full_escaped = json.dumps(full_value)[1:-1]
                    delta_escaped = full_escaped[len(prev_escaped):]

                    self.in_param = False
                    self.current_param_value = ""

                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_index,
                            function=DeltaFunctionCall(
                                arguments=delta_escaped + '"'),
                        )
                    ])
                else:
                    # Continue accumulating value
                    value_chunk = delta_text

                    # Handle first chunk after param name
                    if not self.current_param_value and ">" in value_chunk:
                        gt_idx = value_chunk.find(">")
                        value_chunk = value_chunk[gt_idx + 1:]

                    if (not self.current_param_value
                            and value_chunk.startswith("\n")):
                        value_chunk = value_chunk[1:]

                    if value_chunk:
                        # Stream the escaped delta
                        prev_escaped = (json.dumps(
                            self.current_param_value)[1:-1]
                                        if self.current_param_value else "")
                        self.current_param_value += value_chunk
                        full_escaped = json.dumps(
                            self.current_param_value)[1:-1]
                        delta_escaped = full_escaped[len(prev_escaped):]

                        if delta_escaped:
                            return DeltaMessage(tool_calls=[
                                DeltaToolCall(
                                    index=self.current_tool_index,
                                    function=DeltaFunctionCall(
                                        arguments=delta_escaped),
                                )
                            ])

        return None

accumulated_text instance-attribute

accumulated_text: str = ''

current_function_name instance-attribute

current_function_name: Optional[str] = None

current_param_name instance-attribute

current_param_name: Optional[str] = None

current_param_value instance-attribute

current_param_value: str = ''

current_tool_index instance-attribute

current_tool_index: int = 0

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

current_tool_string_id instance-attribute

current_tool_string_id: Optional[str] = None

failed_count instance-attribute

failed_count: int = 0

function_end_token instance-attribute

function_end_token: str = '</function>'

header_sent instance-attribute

header_sent: bool = False

in_function instance-attribute

in_function: bool = False

in_param instance-attribute

in_param: bool = False

is_tool_call_started instance-attribute

is_tool_call_started: bool = False

json_closed instance-attribute

json_closed: bool = False

json_started instance-attribute

json_started: bool = False

param_count instance-attribute

param_count: int = 0

parameter_end_token instance-attribute

parameter_end_token: str = '</parameter>'

parameter_prefix instance-attribute

parameter_prefix: str = '<parameter='

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

tool_call_complete_regex instance-attribute

tool_call_complete_regex = compile(
    "<tool_call>(.*?)</tool_call>", DOTALL
)

tool_call_end_token instance-attribute

tool_call_end_token: str = '</tool_call>'

tool_call_end_token_id instance-attribute

tool_call_end_token_id = get(tool_call_end_token)

tool_call_function_regex instance-attribute

tool_call_function_regex = compile(
    "<function=(.*?)</function>|<function=(.*)$", DOTALL
)

tool_call_parameter_regex instance-attribute

tool_call_parameter_regex = compile(
    "<parameter=(.*?)</parameter>|<parameter=(.*?)$", DOTALL
)

tool_call_prefix instance-attribute

tool_call_prefix: str = '<function='

tool_call_regex instance-attribute

tool_call_regex = compile(
    "<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", DOTALL
)

tool_call_start_token instance-attribute

tool_call_start_token: str = '<tool_call>'

tool_call_start_token_id instance-attribute

tool_call_start_token_id = get(tool_call_start_token)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    self.current_tool_name_sent: bool = False
    self.prev_tool_call_arr: list[dict] = []
    self.streamed_args_for_tool: list[str] = []

    # Sentinel tokens for streaming mode
    self.tool_call_start_token: str = "<tool_call>"
    self.tool_call_end_token: str = "</tool_call>"
    self.tool_call_prefix: str = "<function="
    self.function_end_token: str = "</function>"
    self.parameter_prefix: str = "<parameter="
    self.parameter_end_token: str = "</parameter>"
    self.is_tool_call_started: bool = False
    self.failed_count: int = 0

    # Streaming state variables
    self.current_tool_index: int = 0
    self.header_sent: bool = False
    self.current_tool_string_id: Optional[str] = None
    self.current_function_name: Optional[str] = None
    self.current_param_name: Optional[str] = None
    self.current_param_value: str = ""
    self.param_count: int = 0
    self.in_param: bool = False
    self.in_function: bool = False
    self.accumulated_text: str = ""
    self.json_started: bool = False
    self.json_closed: bool = False

    # Enhanced streaming state - reset for each new message
    self._reset_streaming_state()

    # Regex patterns
    self.tool_call_complete_regex = re.compile(
        r"<tool_call>(.*?)</tool_call>", re.DOTALL)
    self.tool_call_regex = re.compile(
        r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL)
    self.tool_call_function_regex = re.compile(
        r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
    self.tool_call_parameter_regex = re.compile(
        r"<parameter=(.*?)</parameter>|<parameter=(.*?)$", re.DOTALL)

    if not self.model_tokenizer:
        raise ValueError(
            "The model tokenizer must be passed to the ToolParser "
            "constructor during construction.")

    self.tool_call_start_token_id = self.vocab.get(
        self.tool_call_start_token)
    self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

    if (self.tool_call_start_token_id is None
            or self.tool_call_end_token_id is None):
        raise RuntimeError(
            "Qwen3 XML Tool parser could not locate tool call start/end "
            "tokens in the tokenizer!")

    logger.debug("vLLM Successfully import tool parser %s !",
                 self.__class__.__name__)

_generate_tool_call_id

_generate_tool_call_id() -> str

Generate a unique tool call ID.

Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
def _generate_tool_call_id(self) -> str:
    """Generate a unique tool call ID."""
    return f"call_{uuid.uuid4().hex[:24]}"

_get_function_calls

_get_function_calls(model_output: str) -> list[str]
Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
def _get_function_calls(self, model_output: str) -> list[str]:
    # Find all tool calls
    matched_ranges = self.tool_call_regex.findall(model_output)
    raw_tool_calls = [
        match[0] if match[0] else match[1] for match in matched_ranges
    ]

    # Back-off strategy if no tool_call tags found
    if len(raw_tool_calls) == 0:
        raw_tool_calls = [model_output]

    raw_function_calls = []
    for tool_call in raw_tool_calls:
        raw_function_calls.extend(
            self.tool_call_function_regex.findall(tool_call))

    function_calls = [
        match[0] if match[0] else match[1] for match in raw_function_calls
    ]
    return function_calls

_parse_xml_function_call

_parse_xml_function_call(
    function_call_str: str,
    tools: Optional[list[ChatCompletionToolsParam]],
) -> Optional[ToolCall]
Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
def _parse_xml_function_call(
        self, function_call_str: str,
        tools: Optional[list[ChatCompletionToolsParam]]
) -> Optional[ToolCall]:

    def get_arguments_config(func_name: str) -> dict:
        if tools is None:
            return {}
        for config in tools:
            if not hasattr(config, "type") or not (
                    hasattr(config, "function")
                    and hasattr(config.function, "name")):
                continue
            if (config.type == "function"
                    and config.function.name == func_name):
                if not hasattr(config.function, "parameters"):
                    return {}
                params = config.function.parameters
                if isinstance(params, dict) and "properties" in params:
                    return params["properties"]
                elif isinstance(params, dict):
                    return params
                else:
                    return {}
        logger.warning("Tool '%s' is not defined in the tools list.",
                       func_name)
        return {}

    def convert_param_value(param_value: str, param_name: str,
                            param_config: dict, func_name: str) -> Any:
        # Handle null value for any type
        if param_value.lower() == "null":
            return None

        converted_value: Any

        if param_name not in param_config:
            if param_config != {}:
                logger.warning(
                    "Parsed parameter '%s' is not defined in the tool "
                    "parameters for tool '%s', directly returning the "
                    "string value.", param_name, func_name)
            return param_value

        if (isinstance(param_config[param_name], dict)
                and "type" in param_config[param_name]):
            param_type = str(
                param_config[param_name]["type"]).strip().lower()
        else:
            param_type = "string"
        if param_type in [
                "string", "str", "text", "varchar", "char", "enum"
        ]:
            return param_value
        elif (param_type.startswith("int") or param_type.startswith("uint")
              or param_type.startswith("long")
              or param_type.startswith("short")
              or param_type.startswith("unsigned")):
            try:
                converted_value = int(param_value)
                return converted_value
            except ValueError:
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not an "
                    "integer in tool '%s', degenerating to string.",
                    param_value, param_name, func_name)
            return param_value
        elif (param_type.startswith("num")
              or param_type.startswith("float")):
            try:
                float_param_value = float(param_value)
                converted_value = (float_param_value if float_param_value -
                                   int(float_param_value) != 0 else
                                   int(float_param_value))
                return converted_value
            except ValueError:
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not a float "
                    "in tool '%s', degenerating to string.", param_value,
                    param_name, func_name)
            return param_value
        elif param_type in ["boolean", "bool", "binary"]:
            param_value = param_value.lower()
            if param_value not in ["true", "false"]:
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not a "
                    "boolean (`true` of `false`) in tool '%s', "
                    "degenerating to false.", param_value, param_name,
                    func_name)
            return param_value == "true"
        else:
            if param_type == "object" or param_type.startswith("dict"):
                try:
                    converted_value = json.loads(param_value)
                    return converted_value
                except json.JSONDecodeError:
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not a "
                        "valid JSON object in tool '%s', will try other "
                        "methods to parse it.", param_value, param_name,
                        func_name)
            logger.warning(
                "Parameter '%s' has unknown type '%s'. "
                "The value will be treated as a string.", param_name,
                param_type)
            return param_value

    # Extract function name
    end_index = function_call_str.index(">")
    function_name = function_call_str[:end_index]
    param_config = get_arguments_config(function_name)
    parameters = function_call_str[end_index + 1:]
    param_dict = {}
    for match in self.tool_call_parameter_regex.findall(parameters):
        match_text = match[0] if match[0] else match[1]
        idx = match_text.index(">")
        param_name = match_text[:idx]
        param_value = str(match_text[idx + 1:])
        # Remove prefix and trailing \n
        if param_value.startswith("\n"):
            param_value = param_value[1:]
        if param_value.endswith("\n"):
            param_value = param_value[:-1]

        param_dict[param_name] = convert_param_value(
            param_value, param_name, param_config, function_name)
    return ToolCall(
        type="function",
        function=FunctionCall(name=function_name,
                              arguments=json.dumps(param_dict,
                                                   ensure_ascii=False)),
    )

_reset_streaming_state

_reset_streaming_state()

Reset all streaming state.

Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
def _reset_streaming_state(self):
    """Reset all streaming state."""
    self.current_tool_index = 0
    self.is_tool_call_started = False
    self.header_sent = False
    self.current_tool_string_id = None
    self.current_function_name = None
    self.current_param_name = None
    self.current_param_value = ""
    self.param_count = 0
    self.in_param = False
    self.in_function = False
    self.accumulated_text = ""
    self.json_started = False
    self.json_closed = False

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    # Quick check to avoid unnecessary processing
    if self.tool_call_prefix not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    try:
        function_calls = self._get_function_calls(model_output)
        if len(function_calls) == 0:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        tool_calls = [
            self._parse_xml_function_call(function_call_str, request.tools)
            for function_call_str in function_calls
        ]

        # Populate prev_tool_call_arr for serving layer to set
        # finish_reason
        self.prev_tool_call_arr.clear()  # Clear previous calls
        for tool_call in tool_calls:
            if tool_call:
                self.prev_tool_call_arr.append({
                    "name":
                    tool_call.function.name,
                    "arguments":
                    tool_call.function.arguments,
                })

        # Extract content before tool calls
        content_index = model_output.find(self.tool_call_start_token)
        content_index = (content_index if content_index >= 0 else
                         model_output.find(self.tool_call_prefix))
        content = model_output[:content_index]  # .rstrip()

        return ExtractedToolCallInformation(
            tools_called=(len(tool_calls) > 0),
            tool_calls=tool_calls,
            content=content if content else None,
        )

    except Exception:
        logger.exception("Error in extracting tool call from response.")
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    # If no delta text, return None unless it's an EOS token after tool
    # calls
    if not delta_text:
        # Check if this is an EOS token after all tool calls are complete
        # We check for tool calls in the text even if is_tool_call_started
        # is False because it might have been reset after processing all
        # tools
        if (delta_token_ids
                and self.tool_call_end_token_id not in delta_token_ids):
            # Count complete tool calls
            complete_calls = len(
                self.tool_call_complete_regex.findall(current_text))

            # If we have completed tool calls and populated
            # prev_tool_call_arr
            if (complete_calls > 0 and len(self.prev_tool_call_arr) > 0):
                # Check if all tool calls are closed
                open_calls = (
                    current_text.count(self.tool_call_start_token) -
                    current_text.count(self.tool_call_end_token))
                if open_calls == 0:
                    # Return empty delta message to allow finish_reason
                    # processing
                    return DeltaMessage(content="")
            elif not self.is_tool_call_started and current_text:
                # This is a regular content response that's now complete
                return DeltaMessage(content="")
        return None

    # Check if this is the first call (reset state if needed)
    if not previous_text:
        self._reset_streaming_state()

    # Update accumulated text
    self.accumulated_text = current_text

    # Check if we need to advance to next tool
    if self.json_closed and not self.in_function:
        # Check if this tool call has ended
        tool_ends = current_text.count(self.tool_call_end_token)
        if tool_ends > self.current_tool_index:
            # This tool has ended, advance to next
            self.current_tool_index += 1
            self.header_sent = False
            self.param_count = 0
            self.json_started = False
            self.json_closed = False

            # Check if there are more tool calls
            tool_starts_count = current_text.count(
                self.tool_call_start_token)
            if self.current_tool_index >= tool_starts_count:
                # No more tool calls
                self.is_tool_call_started = False
            # Continue processing next tool
            return None

    # Handle normal content before tool calls
    if not self.is_tool_call_started:
        # Check if tool call is starting
        if (self.tool_call_start_token_id in delta_token_ids
                or self.tool_call_start_token in delta_text):
            self.is_tool_call_started = True
            # Return any content before the tool call
            if self.tool_call_start_token in delta_text:
                content_before = delta_text[:delta_text.index(
                    self.tool_call_start_token)]
                if content_before:
                    return DeltaMessage(content=content_before)
            return None
        else:
            # Check if we're between tool calls - skip whitespace
            if (current_text.rstrip().endswith(self.tool_call_end_token)
                    and delta_text.strip() == ""):
                # We just ended a tool call, skip whitespace
                return None
            # Normal content, no tool call
            return DeltaMessage(content=delta_text)

    # Check if we're between tool calls (waiting for next one)
    # Count tool calls we've seen vs processed
    tool_starts_count = current_text.count(self.tool_call_start_token)
    if self.current_tool_index >= tool_starts_count:
        # We're past all tool calls, shouldn't be here
        return None

    # We're in a tool call, find the current tool call portion
    # Need to find the correct tool call based on current_tool_index
    tool_starts: list[int] = []
    idx = 0
    while True:
        idx = current_text.find(self.tool_call_start_token, idx)
        if idx == -1:
            break
        tool_starts.append(idx)
        idx += len(self.tool_call_start_token)

    if self.current_tool_index >= len(tool_starts):
        # No more tool calls to process yet
        return None

    tool_start_idx = tool_starts[self.current_tool_index]
    # Find where this tool call ends (or current position if not ended yet)
    tool_end_idx = current_text.find(self.tool_call_end_token,
                                     tool_start_idx)
    if tool_end_idx == -1:
        tool_text = current_text[tool_start_idx:]
    else:
        tool_text = current_text[tool_start_idx:tool_end_idx +
                                 len(self.tool_call_end_token)]

    # Looking for function header
    if not self.header_sent:
        if self.tool_call_prefix in tool_text:
            func_start = (tool_text.find(self.tool_call_prefix) +
                          len(self.tool_call_prefix))
            func_end = tool_text.find(">", func_start)

            if func_end != -1:
                # Found complete function name
                self.current_function_name = tool_text[func_start:func_end]
                self.current_tool_string_id = self._generate_tool_call_id()
                self.header_sent = True
                self.in_function = True

                # IMPORTANT: Add to prev_tool_call_arr immediately when we
                # detect a tool call. This ensures
                # finish_reason="tool_calls" even if parsing isn't complete
                already_added = any(
                    tool.get("name") == self.current_function_name
                    for tool in self.prev_tool_call_arr)
                if not already_added:
                    self.prev_tool_call_arr.append({
                        "name": self.current_function_name,
                        "arguments":
                        "{}",  # Placeholder, will be updated later
                    })

                # Send header with function info
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        id=self.current_tool_string_id,
                        function=DeltaFunctionCall(
                            name=self.current_function_name, arguments=""),
                        type="function",
                    )
                ])
        return None

    # We've sent header, now handle function body
    if self.in_function:
        # Send opening brace if not sent yet
        if (not self.json_started
                and self.parameter_prefix not in delta_text):
            self.json_started = True
            return DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.current_tool_index,
                    function=DeltaFunctionCall(arguments="{"),
                )
            ])

        # Make sure json_started is set if we're processing parameters
        if not self.json_started:
            self.json_started = True

        # Check for function end in accumulated text
        if not self.json_closed and self.function_end_token in tool_text:
            # Close JSON
            self.json_closed = True

            # Extract the complete tool call to update prev_tool_call_arr
            # with final arguments. Find the function content
            func_start = (tool_text.find(self.tool_call_prefix) +
                          len(self.tool_call_prefix))
            func_content_end = tool_text.find(self.function_end_token,
                                              func_start)
            if func_content_end != -1:
                func_content = tool_text[func_start:func_content_end]
                # Parse to get the complete arguments
                try:
                    parsed_tool = self._parse_xml_function_call(
                        func_content, request.tools if request else None)
                    if parsed_tool:
                        # Update existing entry in prev_tool_call_arr with
                        # complete arguments
                        for i, tool in enumerate(self.prev_tool_call_arr):
                            if (tool.get("name") ==
                                    parsed_tool.function.name):
                                self.prev_tool_call_arr[i]["arguments"] = (
                                    parsed_tool.function.arguments)
                                break
                except Exception:
                    pass  # Ignore parsing errors during streaming

            result = DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.current_tool_index,
                    function=DeltaFunctionCall(arguments="}"),
                )
            ])

            # Reset state for next tool
            self.in_function = False
            self.json_closed = True

            return result

        # Look for parameters
        # Count how many complete parameters we have processed
        complete_params = tool_text.count(self.parameter_end_token)

        # Check if we should start a new parameter
        if not self.in_param and self.param_count < complete_params:
            # Find the unprocessed parameter
            # Count parameter starts
            param_starts = []
            idx = 0
            while True:
                idx = tool_text.find(self.parameter_prefix, idx)
                if idx == -1:
                    break
                param_starts.append(idx)
                idx += len(self.parameter_prefix)

            if len(param_starts) > self.param_count:
                # Process the next parameter
                param_idx = param_starts[self.param_count]
                param_start = param_idx + len(self.parameter_prefix)
                remaining = tool_text[param_start:]

                if ">" in remaining:
                    # We have the complete parameter name
                    name_end = remaining.find(">")
                    self.current_param_name = remaining[:name_end]

                    # Find the parameter value
                    value_start = param_start + name_end + 1
                    value_text = tool_text[value_start:]
                    if value_text.startswith("\n"):
                        value_text = value_text[1:]

                    # Find where this parameter ends
                    param_end_idx = value_text.find(
                        self.parameter_end_token)
                    if param_end_idx != -1:
                        # Complete parameter found
                        param_value = value_text[:param_end_idx]
                        if param_value.endswith("\n"):
                            param_value = param_value[:-1]

                        # Build complete JSON fragment for this parameter
                        if self.param_count == 0:
                            json_fragment = (
                                '"' + self.current_param_name + '": "' +
                                json.dumps(param_value)[1:-1] + '"')
                        else:
                            json_fragment = (
                                ', "' + self.current_param_name + '": "' +
                                json.dumps(param_value)[1:-1] + '"')

                        self.param_count += 1

                        return DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=self.current_tool_index,
                                function=DeltaFunctionCall(
                                    arguments=json_fragment),
                            )
                        ])

        # Continue parameter value
        if self.in_param:
            if self.parameter_end_token in delta_text:
                # End of parameter
                end_idx = delta_text.find(self.parameter_end_token)
                value_chunk = delta_text[:end_idx]

                # Skip past > if at start
                if not self.current_param_value and ">" in value_chunk:
                    gt_idx = value_chunk.find(">")
                    value_chunk = value_chunk[gt_idx + 1:]

                if (not self.current_param_value
                        and value_chunk.startswith("\n")):
                    value_chunk = value_chunk[1:]

                # Calculate incremental JSON
                full_value = self.current_param_value + value_chunk
                prev_escaped = (json.dumps(self.current_param_value)[1:-1]
                                if self.current_param_value else "")
                full_escaped = json.dumps(full_value)[1:-1]
                delta_escaped = full_escaped[len(prev_escaped):]

                self.in_param = False
                self.current_param_value = ""

                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        function=DeltaFunctionCall(
                            arguments=delta_escaped + '"'),
                    )
                ])
            else:
                # Continue accumulating value
                value_chunk = delta_text

                # Handle first chunk after param name
                if not self.current_param_value and ">" in value_chunk:
                    gt_idx = value_chunk.find(">")
                    value_chunk = value_chunk[gt_idx + 1:]

                if (not self.current_param_value
                        and value_chunk.startswith("\n")):
                    value_chunk = value_chunk[1:]

                if value_chunk:
                    # Stream the escaped delta
                    prev_escaped = (json.dumps(
                        self.current_param_value)[1:-1]
                                    if self.current_param_value else "")
                    self.current_param_value += value_chunk
                    full_escaped = json.dumps(
                        self.current_param_value)[1:-1]
                    delta_escaped = full_escaped[len(prev_escaped):]

                    if delta_escaped:
                        return DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=self.current_tool_index,
                                function=DeltaFunctionCall(
                                    arguments=delta_escaped),
                            )
                        ])

    return None

SeedOssToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
@ToolParserManager.register_module("seed_oss")
class SeedOssToolParser(ToolParser):
    TOOL_CALL_START = "<seed:tool_call>"
    TOOL_CALL_END = "</seed:tool_call>"

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        # --- streaming state ---
        self._reset_streaming_state()
        self.prev_tool_call_arr: list[dict] = []

        self.tool_call_start_token: str = self.TOOL_CALL_START
        self.tool_call_end_token: str = self.TOOL_CALL_END
        # Sentinel tokens for streaming mode
        self.tool_call_prefix: str = "<function="
        self.function_end_token: str = "</function>"
        self.parameter_prefix: str = "<parameter="
        self.parameter_end_token: str = "</parameter>"
        self.think_start_token: str = "<seed:think>"
        self.think_end_token: str = "</seed:think>"
        self.is_tool_call_started: bool = False
        self.is_thinking_end: bool = False
        self.failed_count: int = 0
        self._reset_streaming_state()

        self.tool_call_start_token_id = self.vocab.get(
            self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
        self.think_end_token_id = self.vocab.get(self.think_end_token)

        if (self.tool_call_start_token_id is None
                or self.tool_call_end_token_id is None):
            raise RuntimeError(
                "Seed_Oss XML parser: tokenizer did not include "
                "<seed:tool_call> or its closing tag.")

        tool_start_re = re.escape(self.tool_call_start_token)
        tool_end_re = re.escape(self.tool_call_end_token)

        self.tool_call_complete_regex = re.compile(
            rf"{tool_start_re}(.*?){tool_end_re}", re.DOTALL)
        self.tool_call_regex = re.compile(
            rf"{tool_start_re}(.*?){tool_end_re}|{tool_start_re}(.*?)$",
            re.DOTALL)

        self.tool_call_function_regex = re.compile(
            r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
        self.tool_call_parameter_regex = re.compile(
            r"<parameter=(.*?)</parameter>|<parameter=(.*?)$", re.DOTALL)

        logger.info("vLLM Seed-Oss XML tool parser loaded (%s).",
                    self.__class__.__name__)

    def _generate_tool_call_id(self) -> str:
        """Generate a unique tool call ID."""
        return f"call_{uuid.uuid4().hex[:24]}"

    def _reset_streaming_state(self):
        """Reset all streaming state."""
        self.current_tool_index = 0
        self.is_tool_call_started = False
        self.header_sent = False
        self.current_tool_id = -1
        self.current_function_name = None
        self.current_param_name = None
        self.current_param_value = ""
        self.param_count = 0
        self.in_param = False
        self.in_function = False
        self.accumulated_text = ""
        self.json_started = False
        self.json_closed = False

    def _parse_xml_function_call(
            self, function_call_str: str,
            tools: Optional[list[ChatCompletionToolsParam]]
    ) -> Optional[ToolCall]:

        def get_arguments_config(func_name: str) -> dict:
            if tools is None:
                return {}
            for config in tools:
                if not hasattr(config, "type") or not (
                        hasattr(config, "function")
                        and hasattr(config.function, "name")):
                    continue
                if (config.type == "function"
                        and config.function.name == func_name):
                    if not hasattr(config.function, "parameters"):
                        return {}
                    params = config.function.parameters
                    if isinstance(params, dict) and "properties" in params:
                        return params["properties"]
                    elif isinstance(params, dict):
                        return params
                    else:
                        return {}
            logger.warning("Tool '%s' is not defined in the tools list.",
                           func_name)
            return {}

        def convert_param_value(param_value: str, param_name: str,
                                param_config: dict, func_name: str) -> Any:
            # Handle null value for any type
            if param_value.lower() == "null":
                return None

            if param_name not in param_config:
                if param_config != {}:
                    logger.warning(
                        "Parsed parameter '%s' is not defined in "
                        "the tool parameters for tool '%s', "
                        "directly returning the string value.", param_name,
                        func_name)
                return param_value

            if (isinstance(param_config[param_name], dict)
                    and "type" in param_config[param_name]):
                param_type = str(
                    param_config[param_name]["type"]).strip().lower()
            else:
                param_type = "string"
            if param_type in [
                    "string", "str", "text", "varchar", "char", "enum"
            ]:
                return param_value
            elif (param_type.startswith("int") or param_type.startswith("uint")
                  or param_type.startswith("long")
                  or param_type.startswith("short")
                  or param_type.startswith("unsigned")):
                try:
                    param_value = int(param_value)  # type: ignore
                except (ValueError, TypeError):
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not an integer in tool "
                        "'%s', degenerating to string.", param_value,
                        param_name, func_name)
                return param_value
            elif param_type.startswith("num") or param_type.startswith(
                    "float"):
                try:
                    float_param_value = float(param_value)
                    param_value = float_param_value if float_param_value - int(
                        float_param_value) != 0 else int(
                            float_param_value)  # type: ignore
                except (ValueError, TypeError):
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not a float in tool "
                        "'%s', degenerating to string.", param_value,
                        param_name, func_name)
                return param_value
            elif param_type in ["boolean", "bool", "binary"]:
                param_value = param_value.lower()
                if param_value not in ["true", "false"]:
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not a boolean "
                        "(`true` of `false`) in tool '%s', degenerating to false.",
                        param_value, param_name, func_name)
                return param_value == "true"
            else:
                if param_type == "object" or param_type.startswith("dict"):
                    try:
                        param_value = json.loads(param_value)
                        return param_value
                    except (ValueError, TypeError, json.JSONDecodeError):
                        logger.warning(
                            "Parsed value '%s' of parameter '%s' is not a valid JSON "
                            "object in tool '%s', will try other methods to parse it.",
                            param_value, param_name, func_name)
                try:
                    param_value = ast.literal_eval(param_value)
                except (ValueError, SyntaxError):
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' cannot be converted via "
                        "Python `ast.literal_eval()` in tool '%s', degenerating to string.",
                        param_value, param_name, func_name)
                return param_value

        # Extract function name
        end_index = function_call_str.index(">")
        function_name = function_call_str[:end_index]
        param_config = get_arguments_config(function_name)
        parameters = function_call_str[end_index + 1:]
        param_dict = {}
        for match in self.tool_call_parameter_regex.findall(parameters):
            match_text = match[0] if match[0] else match[1]
            idx = match_text.index(">")
            param_name = match_text[:idx]
            param_value = str(match_text[idx + 1:])
            # Remove prefix and trailing \n
            if param_value.startswith("\n"):
                param_value = param_value[1:]
            if param_value.endswith("\n"):
                param_value = param_value[:-1]

            param_dict[param_name] = convert_param_value(
                param_value, param_name, param_config, function_name)
        return ToolCall(
            type="function",
            function=FunctionCall(name=function_name,
                                  arguments=json.dumps(param_dict,
                                                       ensure_ascii=False)),
        )

    def _get_function_calls(self, model_output: str) -> list[str]:
        # Find all tool calls
        matched_ranges = self.tool_call_regex.findall(model_output)
        raw_tool_calls = [
            match[0] if match[0] else match[1] for match in matched_ranges
        ]

        # Back-off strategy if no tool_call tags found
        if len(raw_tool_calls) == 0:
            raw_tool_calls = [model_output]

        raw_function_calls = []
        for tool_call in raw_tool_calls:
            raw_function_calls.extend(
                self.tool_call_function_regex.findall(tool_call))

        function_calls = [
            match[0] if match[0] else match[1] for match in raw_function_calls
        ]
        return function_calls

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        # Quick check to avoid unnecessary processing
        if self.tool_call_prefix not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        # Check if both think start and end tokens are present
        if (self.think_start_token in model_output
                and self.think_end_token in model_output):
            # Find the position of think end token
            think_end_index = model_output.find(self.think_end_token) + len(
                self.think_end_token)
            # Extract content after think end token
            result_content = model_output[think_end_index:]
            thinking_content = model_output[:think_end_index]
        else:
            thinking_content = ""
            result_content = model_output

        try:
            function_calls = self._get_function_calls(result_content)
            if len(function_calls) == 0:
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=model_output)

            tool_calls = [
                self._parse_xml_function_call(function_call_str, request.tools)
                for function_call_str in function_calls
            ]

            # Populate prev_tool_call_arr for serving layer to set finish_reason
            self.prev_tool_call_arr.clear()  # Clear previous calls
            for tool_call in tool_calls:
                if tool_call:
                    self.prev_tool_call_arr.append({
                        "name":
                        tool_call.function.name,
                        "arguments":
                        tool_call.function.arguments,
                    })

            # Extract content before tool calls
            tool_call_start_index = result_content.find(
                self.tool_call_start_token)
            tool_call_start_index = (
                tool_call_start_index if tool_call_start_index >= 0 else
                result_content.find(self.tool_call_prefix))
            content = thinking_content + result_content[:tool_call_start_index]

            return ExtractedToolCallInformation(
                tools_called=(len(tool_calls) > 0),
                tool_calls=tool_calls,
                content=content if content else None,
            )

        except Exception:
            logger.exception("Error in extracting tool call from response.")
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        # If no delta text, return None unless
        # it's an EOS token after tool calls
        if not delta_text:
            # Check if this is an EOS token after all tool calls are complete
            # We check for tool calls in the text even if is_tool_call_started
            # is False because it might have been reset after processing all tools
            if (delta_token_ids
                    and self.tool_call_end_token_id not in delta_token_ids):
                # Count complete tool calls
                complete_calls = len(
                    self.tool_call_complete_regex.findall(current_text))

                # If we have completed tool calls and populated prev_tool_call_arr
                if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
                    # Check if all tool calls are closed
                    open_calls = current_text.count(
                        self.tool_call_start_token) - current_text.count(
                            self.tool_call_end_token)
                    if open_calls == 0:
                        # Return empty delta message to allow finish_reason processing
                        return DeltaMessage(content="")
                elif not self.is_tool_call_started and current_text:
                    # This is a regular content response that's now complete
                    return DeltaMessage(content="")
            return None

        # Check if this is the first call (reset state if needed)
        if not previous_text:
            self._reset_streaming_state()

        # Update accumulated text
        self.accumulated_text = current_text

        # Check if we need to advance to next tool
        if self.json_closed and not self.in_function:
            # Check if this tool call has ended
            tool_ends = current_text.count(self.tool_call_end_token)
            if tool_ends > self.current_tool_index:
                # This tool has ended, advance to next
                self.current_tool_index += 1
                self.header_sent = False
                self.param_count = 0
                self.json_started = False
                self.json_closed = False

                # Check if there are more tool calls
                if self.current_tool_index >= current_text.count(
                        self.tool_call_start_token):
                    # No more tool calls
                    self.is_tool_call_started = False
                # Continue processing next tool
                return None

        # Check if end thinking
        if (not self.is_thinking_end
                and (self.think_end_token_id in delta_token_ids
                     or self.think_end_token in delta_text)):
            self.is_thinking_end = True

        # If thinking hasn't ended yet, don't process any tool calls
        if not self.is_thinking_end:
            return DeltaMessage(content=delta_text)

        # Handle normal content before tool calls
        if not self.is_tool_call_started:
            # Check if tool call is starting
            if (self.tool_call_start_token_id in delta_token_ids
                    or self.tool_call_start_token in delta_text):
                self.is_tool_call_started = True
                # Return any content before the tool call
                if self.tool_call_start_token in delta_text:
                    content_before = delta_text[:delta_text.index(
                        self.tool_call_start_token)]
                    if content_before:
                        return DeltaMessage(content=content_before)
                return None
            else:
                # Check if we're between tool calls - skip whitespace
                if (current_text.rstrip().endswith(self.tool_call_end_token)
                        and delta_text.strip() == ""):
                    # We just ended a tool call, skip whitespace
                    return None
                # Normal content, no tool call
                return DeltaMessage(content=delta_text)

        # Check if we're between tool calls (waiting for next one)
        # Count tool calls we've seen vs processed
        tool_starts_count = current_text.count(self.tool_call_start_token)
        if self.current_tool_index >= tool_starts_count:
            # We're past all tool calls, shouldn't be here
            return None

        # We're in a tool call, find the current tool call portion
        # Need to find the correct tool call based on current_tool_index
        # Only process tool calls after think_end_token
        think_end_index = current_text.find(self.think_end_token) + len(
            self.think_end_token
        ) if self.think_end_token in current_text else 0
        tool_starts: list[int] = []
        idx = think_end_index
        while True:
            idx = current_text.find(self.tool_call_start_token, idx)
            if idx == -1:
                break
            tool_starts.append(idx)
            idx += len(self.tool_call_start_token)

        if self.current_tool_index >= len(tool_starts):
            # No more tool calls to process yet
            return None

        tool_start_idx = tool_starts[self.current_tool_index]
        # Find where this tool call ends (or current position if not ended yet)
        tool_end_idx = current_text.find(self.tool_call_end_token,
                                         tool_start_idx)
        if tool_end_idx == -1:
            tool_text = current_text[tool_start_idx:]
        else:
            tool_text = current_text[tool_start_idx:tool_end_idx +
                                     len(self.tool_call_end_token)]

        # Looking for function header
        if not self.header_sent:
            if self.tool_call_prefix in tool_text:
                func_start = tool_text.find(self.tool_call_prefix) + len(
                    self.tool_call_prefix)
                func_end = tool_text.find(">", func_start)

                if func_end != -1:
                    # Found complete function name
                    self.current_function_name = tool_text[func_start:func_end]
                    self.current_tool_id = self._generate_tool_call_id(
                    )  # type: ignore
                    self.header_sent = True
                    self.in_function = True

                    # IMPORTANT: Add to prev_tool_call_arr immediately when we detect a tool call
                    # This ensures finish_reason="tool_calls" even if parsing isn't complete
                    already_added = any(
                        tool.get("name") == self.current_function_name
                        for tool in self.prev_tool_call_arr)
                    if not already_added:
                        self.prev_tool_call_arr.append({
                            "name": self.current_function_name,
                            "arguments":
                            "{}",  # Placeholder, will be updated later
                        })

                    # Send header with function info
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_index,
                            id=self.current_tool_id,
                            function=DeltaFunctionCall(
                                name=self.current_function_name, arguments=""),
                            type="function",
                        )
                    ])
            return None

        # We've sent header, now handle function body
        if self.in_function:
            # Send opening brace if not sent yet
            if (not self.json_started
                    and self.parameter_prefix not in delta_text):
                self.json_started = True
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        function=DeltaFunctionCall(arguments="{"),
                    )
                ])

            # Make sure json_started is set if we're processing parameters
            if not self.json_started:
                self.json_started = True

            # Check for function end in accumulated text
            if not self.json_closed and self.function_end_token in tool_text:
                # Close JSON
                self.json_closed = True

                # Extract the complete tool call to update prev_tool_call_arr with final arguments
                # Find the function content
                func_start = tool_text.find(self.tool_call_prefix) + len(
                    self.tool_call_prefix)
                func_content_end = tool_text.find(self.function_end_token,
                                                  func_start)
                if func_content_end != -1:
                    func_content = tool_text[func_start:func_content_end]
                    # Parse to get the complete arguments
                    try:
                        parsed_tool = self._parse_xml_function_call(
                            func_content, request.tools if request else None)
                        if parsed_tool:
                            # Update existing entry in prev_tool_call_arr with complete arguments
                            for i, tool in enumerate(self.prev_tool_call_arr):
                                if tool.get(
                                        "name") == parsed_tool.function.name:
                                    self.prev_tool_call_arr[i]["arguments"] = (
                                        parsed_tool.function.arguments)
                                    break
                    except Exception:
                        logger.warning(
                            "Failed to parse tool arguments during streaming.",
                            exc_info=True)

                result = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        function=DeltaFunctionCall(arguments="}"),
                    )
                ])

                # Reset state for next tool
                self.in_function = False
                self.json_closed = True

                return result

            # Look for parameters
            # Count how many complete parameters we have processed
            complete_params = tool_text.count(self.parameter_end_token)

            # Check if we should start a new parameter
            if not self.in_param and self.param_count < complete_params:
                # Find the unprocessed parameter
                # Count parameter starts
                param_starts = []
                idx = 0
                while True:
                    idx = tool_text.find(self.parameter_prefix, idx)
                    if idx == -1:
                        break
                    param_starts.append(idx)
                    idx += len(self.parameter_prefix)

                if len(param_starts) > self.param_count:
                    # Process the next parameter
                    param_idx = param_starts[self.param_count]
                    param_start = param_idx + len(self.parameter_prefix)
                    remaining = tool_text[param_start:]

                    if ">" in remaining:
                        # We have the complete parameter name
                        name_end = remaining.find(">")
                        self.current_param_name = remaining[:name_end]

                        # Find the parameter value
                        value_start = param_start + name_end + 1
                        value_text = tool_text[value_start:]
                        if value_text.startswith("\n"):
                            value_text = value_text[1:]

                        # Find where this parameter ends
                        param_end_idx = value_text.find(
                            self.parameter_end_token)
                        if param_end_idx != -1:
                            # Complete parameter found
                            param_value = value_text[:param_end_idx]
                            if param_value.endswith("\n"):
                                param_value = param_value[:-1]

                            # Build complete JSON fragment for this parameter
                            if self.param_count == 0:
                                json_fragment = (
                                    '"' + self.current_param_name + '": "' +
                                    json.dumps(param_value)[1:-1] + '"')
                            else:
                                json_fragment = (
                                    ', "' + self.current_param_name + '": "' +
                                    json.dumps(param_value)[1:-1] + '"')

                            self.param_count += 1

                            return DeltaMessage(tool_calls=[
                                DeltaToolCall(
                                    index=self.current_tool_index,
                                    function=DeltaFunctionCall(
                                        arguments=json_fragment),
                                )
                            ])

            # Continue parameter value
            if self.in_param:
                if self.parameter_end_token in delta_text:
                    # End of parameter
                    end_idx = delta_text.find(self.parameter_end_token)
                    value_chunk = delta_text[:end_idx]

                    # Skip past > if at start
                    if not self.current_param_value and ">" in value_chunk:
                        gt_idx = value_chunk.find(">")
                        value_chunk = value_chunk[gt_idx + 1:]

                    if not self.current_param_value and value_chunk.startswith(
                            "\n"):
                        value_chunk = value_chunk[1:]

                    # Calculate incremental JSON
                    full_value = self.current_param_value + value_chunk
                    prev_escaped = (json.dumps(self.current_param_value)[1:-1]
                                    if self.current_param_value else "")
                    full_escaped = json.dumps(full_value)[1:-1]
                    delta_escaped = full_escaped[len(prev_escaped):]

                    self.in_param = False
                    self.current_param_value = ""

                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=self.current_tool_index,
                            function=DeltaFunctionCall(
                                arguments=delta_escaped + '"'),
                        )
                    ])
                else:
                    # Continue accumulating value
                    value_chunk = delta_text

                    # Handle first chunk after param name
                    if not self.current_param_value and ">" in value_chunk:
                        gt_idx = value_chunk.find(">")
                        value_chunk = value_chunk[gt_idx + 1:]

                    if not self.current_param_value and value_chunk.startswith(
                            "\n"):
                        value_chunk = value_chunk[1:]

                    if value_chunk:
                        # Stream the escaped delta
                        prev_escaped = (json.dumps(
                            self.current_param_value)[1:-1]
                                        if self.current_param_value else "")
                        self.current_param_value += value_chunk
                        full_escaped = json.dumps(
                            self.current_param_value)[1:-1]
                        delta_escaped = full_escaped[len(prev_escaped):]

                        if delta_escaped:
                            return DeltaMessage(tool_calls=[
                                DeltaToolCall(
                                    index=self.current_tool_index,
                                    function=DeltaFunctionCall(
                                        arguments=delta_escaped),
                                )
                            ])

        return None

TOOL_CALL_END class-attribute instance-attribute

TOOL_CALL_END = '</seed:tool_call>'

TOOL_CALL_START class-attribute instance-attribute

TOOL_CALL_START = '<seed:tool_call>'

failed_count instance-attribute

failed_count: int = 0

function_end_token instance-attribute

function_end_token: str = '</function>'

is_thinking_end instance-attribute

is_thinking_end: bool = False

is_tool_call_started instance-attribute

is_tool_call_started: bool = False

parameter_end_token instance-attribute

parameter_end_token: str = '</parameter>'

parameter_prefix instance-attribute

parameter_prefix: str = '<parameter='

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

think_end_token instance-attribute

think_end_token: str = '</seed:think>'

think_end_token_id instance-attribute

think_end_token_id = get(think_end_token)

think_start_token instance-attribute

think_start_token: str = '<seed:think>'

tool_call_complete_regex instance-attribute

tool_call_complete_regex = compile(
    f"{tool_start_re}(.*?){tool_end_re}", DOTALL
)

tool_call_end_token instance-attribute

tool_call_end_token: str = TOOL_CALL_END

tool_call_end_token_id instance-attribute

tool_call_end_token_id = get(tool_call_end_token)

tool_call_function_regex instance-attribute

tool_call_function_regex = compile(
    "<function=(.*?)</function>|<function=(.*)$", DOTALL
)

tool_call_parameter_regex instance-attribute

tool_call_parameter_regex = compile(
    "<parameter=(.*?)</parameter>|<parameter=(.*?)$", DOTALL
)

tool_call_prefix instance-attribute

tool_call_prefix: str = '<function='

tool_call_regex instance-attribute

tool_call_regex = compile(
    f"{tool_start_re}(.*?){tool_end_re}|{tool_start_re}(.*?)$",
    DOTALL,
)

tool_call_start_token instance-attribute

tool_call_start_token: str = TOOL_CALL_START

tool_call_start_token_id instance-attribute

tool_call_start_token_id = get(tool_call_start_token)

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    # --- streaming state ---
    self._reset_streaming_state()
    self.prev_tool_call_arr: list[dict] = []

    self.tool_call_start_token: str = self.TOOL_CALL_START
    self.tool_call_end_token: str = self.TOOL_CALL_END
    # Sentinel tokens for streaming mode
    self.tool_call_prefix: str = "<function="
    self.function_end_token: str = "</function>"
    self.parameter_prefix: str = "<parameter="
    self.parameter_end_token: str = "</parameter>"
    self.think_start_token: str = "<seed:think>"
    self.think_end_token: str = "</seed:think>"
    self.is_tool_call_started: bool = False
    self.is_thinking_end: bool = False
    self.failed_count: int = 0
    self._reset_streaming_state()

    self.tool_call_start_token_id = self.vocab.get(
        self.tool_call_start_token)
    self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
    self.think_end_token_id = self.vocab.get(self.think_end_token)

    if (self.tool_call_start_token_id is None
            or self.tool_call_end_token_id is None):
        raise RuntimeError(
            "Seed_Oss XML parser: tokenizer did not include "
            "<seed:tool_call> or its closing tag.")

    tool_start_re = re.escape(self.tool_call_start_token)
    tool_end_re = re.escape(self.tool_call_end_token)

    self.tool_call_complete_regex = re.compile(
        rf"{tool_start_re}(.*?){tool_end_re}", re.DOTALL)
    self.tool_call_regex = re.compile(
        rf"{tool_start_re}(.*?){tool_end_re}|{tool_start_re}(.*?)$",
        re.DOTALL)

    self.tool_call_function_regex = re.compile(
        r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
    self.tool_call_parameter_regex = re.compile(
        r"<parameter=(.*?)</parameter>|<parameter=(.*?)$", re.DOTALL)

    logger.info("vLLM Seed-Oss XML tool parser loaded (%s).",
                self.__class__.__name__)

_generate_tool_call_id

_generate_tool_call_id() -> str

Generate a unique tool call ID.

Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
def _generate_tool_call_id(self) -> str:
    """Generate a unique tool call ID."""
    return f"call_{uuid.uuid4().hex[:24]}"

_get_function_calls

_get_function_calls(model_output: str) -> list[str]
Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
def _get_function_calls(self, model_output: str) -> list[str]:
    # Find all tool calls
    matched_ranges = self.tool_call_regex.findall(model_output)
    raw_tool_calls = [
        match[0] if match[0] else match[1] for match in matched_ranges
    ]

    # Back-off strategy if no tool_call tags found
    if len(raw_tool_calls) == 0:
        raw_tool_calls = [model_output]

    raw_function_calls = []
    for tool_call in raw_tool_calls:
        raw_function_calls.extend(
            self.tool_call_function_regex.findall(tool_call))

    function_calls = [
        match[0] if match[0] else match[1] for match in raw_function_calls
    ]
    return function_calls

_parse_xml_function_call

_parse_xml_function_call(
    function_call_str: str,
    tools: Optional[list[ChatCompletionToolsParam]],
) -> Optional[ToolCall]
Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
def _parse_xml_function_call(
        self, function_call_str: str,
        tools: Optional[list[ChatCompletionToolsParam]]
) -> Optional[ToolCall]:

    def get_arguments_config(func_name: str) -> dict:
        if tools is None:
            return {}
        for config in tools:
            if not hasattr(config, "type") or not (
                    hasattr(config, "function")
                    and hasattr(config.function, "name")):
                continue
            if (config.type == "function"
                    and config.function.name == func_name):
                if not hasattr(config.function, "parameters"):
                    return {}
                params = config.function.parameters
                if isinstance(params, dict) and "properties" in params:
                    return params["properties"]
                elif isinstance(params, dict):
                    return params
                else:
                    return {}
        logger.warning("Tool '%s' is not defined in the tools list.",
                       func_name)
        return {}

    def convert_param_value(param_value: str, param_name: str,
                            param_config: dict, func_name: str) -> Any:
        # Handle null value for any type
        if param_value.lower() == "null":
            return None

        if param_name not in param_config:
            if param_config != {}:
                logger.warning(
                    "Parsed parameter '%s' is not defined in "
                    "the tool parameters for tool '%s', "
                    "directly returning the string value.", param_name,
                    func_name)
            return param_value

        if (isinstance(param_config[param_name], dict)
                and "type" in param_config[param_name]):
            param_type = str(
                param_config[param_name]["type"]).strip().lower()
        else:
            param_type = "string"
        if param_type in [
                "string", "str", "text", "varchar", "char", "enum"
        ]:
            return param_value
        elif (param_type.startswith("int") or param_type.startswith("uint")
              or param_type.startswith("long")
              or param_type.startswith("short")
              or param_type.startswith("unsigned")):
            try:
                param_value = int(param_value)  # type: ignore
            except (ValueError, TypeError):
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not an integer in tool "
                    "'%s', degenerating to string.", param_value,
                    param_name, func_name)
            return param_value
        elif param_type.startswith("num") or param_type.startswith(
                "float"):
            try:
                float_param_value = float(param_value)
                param_value = float_param_value if float_param_value - int(
                    float_param_value) != 0 else int(
                        float_param_value)  # type: ignore
            except (ValueError, TypeError):
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not a float in tool "
                    "'%s', degenerating to string.", param_value,
                    param_name, func_name)
            return param_value
        elif param_type in ["boolean", "bool", "binary"]:
            param_value = param_value.lower()
            if param_value not in ["true", "false"]:
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not a boolean "
                    "(`true` of `false`) in tool '%s', degenerating to false.",
                    param_value, param_name, func_name)
            return param_value == "true"
        else:
            if param_type == "object" or param_type.startswith("dict"):
                try:
                    param_value = json.loads(param_value)
                    return param_value
                except (ValueError, TypeError, json.JSONDecodeError):
                    logger.warning(
                        "Parsed value '%s' of parameter '%s' is not a valid JSON "
                        "object in tool '%s', will try other methods to parse it.",
                        param_value, param_name, func_name)
            try:
                param_value = ast.literal_eval(param_value)
            except (ValueError, SyntaxError):
                logger.warning(
                    "Parsed value '%s' of parameter '%s' cannot be converted via "
                    "Python `ast.literal_eval()` in tool '%s', degenerating to string.",
                    param_value, param_name, func_name)
            return param_value

    # Extract function name
    end_index = function_call_str.index(">")
    function_name = function_call_str[:end_index]
    param_config = get_arguments_config(function_name)
    parameters = function_call_str[end_index + 1:]
    param_dict = {}
    for match in self.tool_call_parameter_regex.findall(parameters):
        match_text = match[0] if match[0] else match[1]
        idx = match_text.index(">")
        param_name = match_text[:idx]
        param_value = str(match_text[idx + 1:])
        # Remove prefix and trailing \n
        if param_value.startswith("\n"):
            param_value = param_value[1:]
        if param_value.endswith("\n"):
            param_value = param_value[:-1]

        param_dict[param_name] = convert_param_value(
            param_value, param_name, param_config, function_name)
    return ToolCall(
        type="function",
        function=FunctionCall(name=function_name,
                              arguments=json.dumps(param_dict,
                                                   ensure_ascii=False)),
    )

_reset_streaming_state

_reset_streaming_state()

Reset all streaming state.

Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
def _reset_streaming_state(self):
    """Reset all streaming state."""
    self.current_tool_index = 0
    self.is_tool_call_started = False
    self.header_sent = False
    self.current_tool_id = -1
    self.current_function_name = None
    self.current_param_name = None
    self.current_param_value = ""
    self.param_count = 0
    self.in_param = False
    self.in_function = False
    self.accumulated_text = ""
    self.json_started = False
    self.json_closed = False

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    # Quick check to avoid unnecessary processing
    if self.tool_call_prefix not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    # Check if both think start and end tokens are present
    if (self.think_start_token in model_output
            and self.think_end_token in model_output):
        # Find the position of think end token
        think_end_index = model_output.find(self.think_end_token) + len(
            self.think_end_token)
        # Extract content after think end token
        result_content = model_output[think_end_index:]
        thinking_content = model_output[:think_end_index]
    else:
        thinking_content = ""
        result_content = model_output

    try:
        function_calls = self._get_function_calls(result_content)
        if len(function_calls) == 0:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        tool_calls = [
            self._parse_xml_function_call(function_call_str, request.tools)
            for function_call_str in function_calls
        ]

        # Populate prev_tool_call_arr for serving layer to set finish_reason
        self.prev_tool_call_arr.clear()  # Clear previous calls
        for tool_call in tool_calls:
            if tool_call:
                self.prev_tool_call_arr.append({
                    "name":
                    tool_call.function.name,
                    "arguments":
                    tool_call.function.arguments,
                })

        # Extract content before tool calls
        tool_call_start_index = result_content.find(
            self.tool_call_start_token)
        tool_call_start_index = (
            tool_call_start_index if tool_call_start_index >= 0 else
            result_content.find(self.tool_call_prefix))
        content = thinking_content + result_content[:tool_call_start_index]

        return ExtractedToolCallInformation(
            tools_called=(len(tool_calls) > 0),
            tool_calls=tool_calls,
            content=content if content else None,
        )

    except Exception:
        logger.exception("Error in extracting tool call from response.")
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    # If no delta text, return None unless
    # it's an EOS token after tool calls
    if not delta_text:
        # Check if this is an EOS token after all tool calls are complete
        # We check for tool calls in the text even if is_tool_call_started
        # is False because it might have been reset after processing all tools
        if (delta_token_ids
                and self.tool_call_end_token_id not in delta_token_ids):
            # Count complete tool calls
            complete_calls = len(
                self.tool_call_complete_regex.findall(current_text))

            # If we have completed tool calls and populated prev_tool_call_arr
            if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
                # Check if all tool calls are closed
                open_calls = current_text.count(
                    self.tool_call_start_token) - current_text.count(
                        self.tool_call_end_token)
                if open_calls == 0:
                    # Return empty delta message to allow finish_reason processing
                    return DeltaMessage(content="")
            elif not self.is_tool_call_started and current_text:
                # This is a regular content response that's now complete
                return DeltaMessage(content="")
        return None

    # Check if this is the first call (reset state if needed)
    if not previous_text:
        self._reset_streaming_state()

    # Update accumulated text
    self.accumulated_text = current_text

    # Check if we need to advance to next tool
    if self.json_closed and not self.in_function:
        # Check if this tool call has ended
        tool_ends = current_text.count(self.tool_call_end_token)
        if tool_ends > self.current_tool_index:
            # This tool has ended, advance to next
            self.current_tool_index += 1
            self.header_sent = False
            self.param_count = 0
            self.json_started = False
            self.json_closed = False

            # Check if there are more tool calls
            if self.current_tool_index >= current_text.count(
                    self.tool_call_start_token):
                # No more tool calls
                self.is_tool_call_started = False
            # Continue processing next tool
            return None

    # Check if end thinking
    if (not self.is_thinking_end
            and (self.think_end_token_id in delta_token_ids
                 or self.think_end_token in delta_text)):
        self.is_thinking_end = True

    # If thinking hasn't ended yet, don't process any tool calls
    if not self.is_thinking_end:
        return DeltaMessage(content=delta_text)

    # Handle normal content before tool calls
    if not self.is_tool_call_started:
        # Check if tool call is starting
        if (self.tool_call_start_token_id in delta_token_ids
                or self.tool_call_start_token in delta_text):
            self.is_tool_call_started = True
            # Return any content before the tool call
            if self.tool_call_start_token in delta_text:
                content_before = delta_text[:delta_text.index(
                    self.tool_call_start_token)]
                if content_before:
                    return DeltaMessage(content=content_before)
            return None
        else:
            # Check if we're between tool calls - skip whitespace
            if (current_text.rstrip().endswith(self.tool_call_end_token)
                    and delta_text.strip() == ""):
                # We just ended a tool call, skip whitespace
                return None
            # Normal content, no tool call
            return DeltaMessage(content=delta_text)

    # Check if we're between tool calls (waiting for next one)
    # Count tool calls we've seen vs processed
    tool_starts_count = current_text.count(self.tool_call_start_token)
    if self.current_tool_index >= tool_starts_count:
        # We're past all tool calls, shouldn't be here
        return None

    # We're in a tool call, find the current tool call portion
    # Need to find the correct tool call based on current_tool_index
    # Only process tool calls after think_end_token
    think_end_index = current_text.find(self.think_end_token) + len(
        self.think_end_token
    ) if self.think_end_token in current_text else 0
    tool_starts: list[int] = []
    idx = think_end_index
    while True:
        idx = current_text.find(self.tool_call_start_token, idx)
        if idx == -1:
            break
        tool_starts.append(idx)
        idx += len(self.tool_call_start_token)

    if self.current_tool_index >= len(tool_starts):
        # No more tool calls to process yet
        return None

    tool_start_idx = tool_starts[self.current_tool_index]
    # Find where this tool call ends (or current position if not ended yet)
    tool_end_idx = current_text.find(self.tool_call_end_token,
                                     tool_start_idx)
    if tool_end_idx == -1:
        tool_text = current_text[tool_start_idx:]
    else:
        tool_text = current_text[tool_start_idx:tool_end_idx +
                                 len(self.tool_call_end_token)]

    # Looking for function header
    if not self.header_sent:
        if self.tool_call_prefix in tool_text:
            func_start = tool_text.find(self.tool_call_prefix) + len(
                self.tool_call_prefix)
            func_end = tool_text.find(">", func_start)

            if func_end != -1:
                # Found complete function name
                self.current_function_name = tool_text[func_start:func_end]
                self.current_tool_id = self._generate_tool_call_id(
                )  # type: ignore
                self.header_sent = True
                self.in_function = True

                # IMPORTANT: Add to prev_tool_call_arr immediately when we detect a tool call
                # This ensures finish_reason="tool_calls" even if parsing isn't complete
                already_added = any(
                    tool.get("name") == self.current_function_name
                    for tool in self.prev_tool_call_arr)
                if not already_added:
                    self.prev_tool_call_arr.append({
                        "name": self.current_function_name,
                        "arguments":
                        "{}",  # Placeholder, will be updated later
                    })

                # Send header with function info
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        id=self.current_tool_id,
                        function=DeltaFunctionCall(
                            name=self.current_function_name, arguments=""),
                        type="function",
                    )
                ])
        return None

    # We've sent header, now handle function body
    if self.in_function:
        # Send opening brace if not sent yet
        if (not self.json_started
                and self.parameter_prefix not in delta_text):
            self.json_started = True
            return DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.current_tool_index,
                    function=DeltaFunctionCall(arguments="{"),
                )
            ])

        # Make sure json_started is set if we're processing parameters
        if not self.json_started:
            self.json_started = True

        # Check for function end in accumulated text
        if not self.json_closed and self.function_end_token in tool_text:
            # Close JSON
            self.json_closed = True

            # Extract the complete tool call to update prev_tool_call_arr with final arguments
            # Find the function content
            func_start = tool_text.find(self.tool_call_prefix) + len(
                self.tool_call_prefix)
            func_content_end = tool_text.find(self.function_end_token,
                                              func_start)
            if func_content_end != -1:
                func_content = tool_text[func_start:func_content_end]
                # Parse to get the complete arguments
                try:
                    parsed_tool = self._parse_xml_function_call(
                        func_content, request.tools if request else None)
                    if parsed_tool:
                        # Update existing entry in prev_tool_call_arr with complete arguments
                        for i, tool in enumerate(self.prev_tool_call_arr):
                            if tool.get(
                                    "name") == parsed_tool.function.name:
                                self.prev_tool_call_arr[i]["arguments"] = (
                                    parsed_tool.function.arguments)
                                break
                except Exception:
                    logger.warning(
                        "Failed to parse tool arguments during streaming.",
                        exc_info=True)

            result = DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.current_tool_index,
                    function=DeltaFunctionCall(arguments="}"),
                )
            ])

            # Reset state for next tool
            self.in_function = False
            self.json_closed = True

            return result

        # Look for parameters
        # Count how many complete parameters we have processed
        complete_params = tool_text.count(self.parameter_end_token)

        # Check if we should start a new parameter
        if not self.in_param and self.param_count < complete_params:
            # Find the unprocessed parameter
            # Count parameter starts
            param_starts = []
            idx = 0
            while True:
                idx = tool_text.find(self.parameter_prefix, idx)
                if idx == -1:
                    break
                param_starts.append(idx)
                idx += len(self.parameter_prefix)

            if len(param_starts) > self.param_count:
                # Process the next parameter
                param_idx = param_starts[self.param_count]
                param_start = param_idx + len(self.parameter_prefix)
                remaining = tool_text[param_start:]

                if ">" in remaining:
                    # We have the complete parameter name
                    name_end = remaining.find(">")
                    self.current_param_name = remaining[:name_end]

                    # Find the parameter value
                    value_start = param_start + name_end + 1
                    value_text = tool_text[value_start:]
                    if value_text.startswith("\n"):
                        value_text = value_text[1:]

                    # Find where this parameter ends
                    param_end_idx = value_text.find(
                        self.parameter_end_token)
                    if param_end_idx != -1:
                        # Complete parameter found
                        param_value = value_text[:param_end_idx]
                        if param_value.endswith("\n"):
                            param_value = param_value[:-1]

                        # Build complete JSON fragment for this parameter
                        if self.param_count == 0:
                            json_fragment = (
                                '"' + self.current_param_name + '": "' +
                                json.dumps(param_value)[1:-1] + '"')
                        else:
                            json_fragment = (
                                ', "' + self.current_param_name + '": "' +
                                json.dumps(param_value)[1:-1] + '"')

                        self.param_count += 1

                        return DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=self.current_tool_index,
                                function=DeltaFunctionCall(
                                    arguments=json_fragment),
                            )
                        ])

        # Continue parameter value
        if self.in_param:
            if self.parameter_end_token in delta_text:
                # End of parameter
                end_idx = delta_text.find(self.parameter_end_token)
                value_chunk = delta_text[:end_idx]

                # Skip past > if at start
                if not self.current_param_value and ">" in value_chunk:
                    gt_idx = value_chunk.find(">")
                    value_chunk = value_chunk[gt_idx + 1:]

                if not self.current_param_value and value_chunk.startswith(
                        "\n"):
                    value_chunk = value_chunk[1:]

                # Calculate incremental JSON
                full_value = self.current_param_value + value_chunk
                prev_escaped = (json.dumps(self.current_param_value)[1:-1]
                                if self.current_param_value else "")
                full_escaped = json.dumps(full_value)[1:-1]
                delta_escaped = full_escaped[len(prev_escaped):]

                self.in_param = False
                self.current_param_value = ""

                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.current_tool_index,
                        function=DeltaFunctionCall(
                            arguments=delta_escaped + '"'),
                    )
                ])
            else:
                # Continue accumulating value
                value_chunk = delta_text

                # Handle first chunk after param name
                if not self.current_param_value and ">" in value_chunk:
                    gt_idx = value_chunk.find(">")
                    value_chunk = value_chunk[gt_idx + 1:]

                if not self.current_param_value and value_chunk.startswith(
                        "\n"):
                    value_chunk = value_chunk[1:]

                if value_chunk:
                    # Stream the escaped delta
                    prev_escaped = (json.dumps(
                        self.current_param_value)[1:-1]
                                    if self.current_param_value else "")
                    self.current_param_value += value_chunk
                    full_escaped = json.dumps(
                        self.current_param_value)[1:-1]
                    delta_escaped = full_escaped[len(prev_escaped):]

                    if delta_escaped:
                        return DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=self.current_tool_index,
                                function=DeltaFunctionCall(
                                    arguments=delta_escaped),
                            )
                        ])

    return None

Step3ToolParser

Bases: ToolParser

Tool parser for a model that uses a specific XML-like format for tool calls. This version uses a robust, stateful, cursor-based streaming parser and consolidates tool arguments into a single message.

Source code in vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
@ToolParserManager.register_module(["step3"])
class Step3ToolParser(ToolParser):
    """
    Tool parser for a model that uses a specific XML-like format for tool calls.
    This version uses a robust, stateful, cursor-based streaming parser and
    consolidates tool arguments into a single message.
    """

    TOOL_CALLS_BEGIN = "<|tool_calls_begin|>"
    TOOL_CALLS_END = "<|tool_calls_end|>"
    TOOL_CALL_BEGIN = "<|tool_call_begin|>"
    TOOL_CALL_END = "<|tool_call_end|>"
    TOOL_SEP = "<|tool_sep|>"
    SPECIAL_TOKENS = [
        TOOL_CALLS_BEGIN, TOOL_CALLS_END, TOOL_CALL_BEGIN, TOOL_CALL_END
    ]

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)
        self.position = 0
        # Explicit state flags for robust streaming
        self.tool_block_started = False
        self.tool_block_finished = False

    def adjust_request(
            self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        if request.tools and request.tool_choice != 'none':
            request.skip_special_tokens = False
        return request

    @staticmethod
    def _parse_steptml_invoke(
            action_text: str
    ) -> tuple[Optional[str], Optional[dict[str, str]]]:
        func_name_match = re.search(r'<steptml:invoke name="([^"]+)">',
                                    action_text)
        if not func_name_match:
            return None, None
        func_name = func_name_match.group(1)

        params: dict[str, str] = {}
        param_matches = re.findall(
            r'<steptml:parameter name="([^"]+)">([^<]*)</steptml:parameter>',
            action_text)
        for name, value in param_matches:
            params[name] = value.strip()
        return func_name, params

    def _cast_arguments(
        self,
        func_name: str,
        params: dict[str, Any],
        request: ChatCompletionRequest,
    ) -> dict[str, Any]:
        for tool in request.tools or []:
            if tool.function.name == func_name:
                schema = tool.function.parameters or {}
                properties = schema.get("properties", {})
                for key, value in params.items():
                    if not isinstance(value, str):
                        continue
                    prop = properties.get(key, {})
                    typ = prop.get("type")
                    if typ == "string":
                        params[key] = value.strip()
                    elif typ == "integer":
                        with contextlib.suppress(ValueError):
                            params[key] = int(value)
                    elif typ == "number":
                        with contextlib.suppress(ValueError):
                            params[key] = float(value)
                    elif typ == "boolean":
                        lower_val = value.lower()
                        params[key] = lower_val == "true" if lower_val in (
                            "true", "false") else value
                    elif typ == "null":
                        params[key] = None if value.lower(
                        ) == "null" else value
                break
        return params

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:

        # The main loop processes the stream from the last known position.
        while True:
            if self.position >= len(current_text):
                return None  # We've processed the entire stream.

            unprocessed_text = current_text[self.position:]

            # STATE: After all tools are done, all subsequent text is content.
            if self.tool_block_finished:
                self.position = len(current_text)
                return DeltaMessage(content=unprocessed_text)

            # STATE: Before the tool block has started.
            if not self.tool_block_started:
                if unprocessed_text.startswith(self.TOOL_CALLS_BEGIN):
                    self.position += len(self.TOOL_CALLS_BEGIN)
                    self.tool_block_started = True
                    continue  # Token consumed, re-loop.

                start_pos = unprocessed_text.find(self.TOOL_CALLS_BEGIN)
                if start_pos == -1:
                    if self.TOOL_CALLS_BEGIN.startswith(
                            unprocessed_text.strip()) and unprocessed_text:
                        return None  # It's a prefix, wait.
                    self.position = len(current_text)
                    return DeltaMessage(content=unprocessed_text)
                else:
                    content = unprocessed_text[:start_pos]
                    self.position += len(content)
                    return DeltaMessage(content=content)

            # STATE: Inside the main tool block.
            offset = len(unprocessed_text) - len(unprocessed_text.lstrip())
            unprocessed_text = unprocessed_text.lstrip()
            self.position += offset

            if unprocessed_text.startswith(self.TOOL_CALLS_END):
                self.position += len(self.TOOL_CALLS_END)
                self.tool_block_finished = True
                self.current_tool_id = -1
                continue

            # Check if we are between tool calls.
            tool_finished = (
                self.current_tool_id != -1 and
                self.prev_tool_call_arr[self.current_tool_id].get("finished"))
            if self.current_tool_id == -1 or tool_finished:
                if unprocessed_text.startswith(self.TOOL_CALL_BEGIN):
                    self.position += len(self.TOOL_CALL_BEGIN)
                    if self.current_tool_id == -1:
                        self.current_tool_id = 0
                    else:
                        self.current_tool_id += 1
                    self.current_tool_name_sent = False
                    while len(self.prev_tool_call_arr) <= self.current_tool_id:
                        self.prev_tool_call_arr.append({})
                    self.prev_tool_call_arr[
                        self.current_tool_id]["finished"] = False
                    continue

                if self.TOOL_CALL_BEGIN.startswith(unprocessed_text):
                    return None

            # STATE: Parsing an active tool call.
            if self.current_tool_id != -1 and not self.prev_tool_call_arr[
                    self.current_tool_id].get("finished", False):
                end_tool_pos = unprocessed_text.find(self.TOOL_CALL_END)
                if end_tool_pos == -1:
                    tool_body = unprocessed_text
                else:
                    tool_body = unprocessed_text[:end_tool_pos]

                if end_tool_pos == -1 and self.TOOL_CALL_END.startswith(
                        tool_body):
                    return None

                function_name, arguments = self._parse_steptml_invoke(
                    tool_body)
                if not function_name:
                    return None

                tool_call_arr = {
                    "name": function_name,
                    "parameters": arguments or {}
                }

                # Send the function name as soon as it's parsed.
                if not self.current_tool_name_sent:
                    self.current_tool_name_sent = True
                    self.prev_tool_call_arr[self.current_tool_id].update(
                        tool_call_arr)
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      type="function",
                                      id=f"chatcmpl-tool-{random_uuid()}",
                                      function=DeltaFunctionCall(
                                          name=function_name))
                    ])

                # Update our internal state with the latest parsed arguments.
                self.prev_tool_call_arr[
                    self.current_tool_id].update(  # noqa: E501
                        tool_call_arr)

                # Only send arguments when the tool call is complete.
                if end_tool_pos != -1:
                    self.position += end_tool_pos + len(self.TOOL_CALL_END)
                    self.prev_tool_call_arr[
                        self.current_tool_id]["finished"] = True

                    final_args = self._cast_arguments(
                        function_name,
                        tool_call_arr.get("parameters", {}),  # type: ignore
                        request)
                    if final_args:
                        final_args_json = json.dumps(final_args,
                                                     ensure_ascii=False)
                        return DeltaMessage(tool_calls=[
                            DeltaToolCall(index=self.current_tool_id,
                                          function=DeltaFunctionCall(
                                              arguments=final_args_json))
                        ])

                # If tool is not finished, return None to wait for more tokens.
                return None

            return None

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        if self.TOOL_CALLS_BEGIN not in model_output:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        pre_text, rest = model_output.split(self.TOOL_CALLS_BEGIN, 1)
        if self.TOOL_CALLS_END not in rest:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

        tool_block, post_text = rest.split(self.TOOL_CALLS_END, 1)
        content = (pre_text + post_text).strip()

        tool_calls: list[ToolCall] = []
        call_parts = tool_block.split(self.TOOL_CALL_BEGIN)

        for part in call_parts:
            if not part or self.TOOL_CALL_END not in part:
                continue

            call_content = part.split(self.TOOL_CALL_END, 1)[0]
            if self.TOOL_SEP not in call_content:
                continue

            type_part, invoke_part = call_content.split(self.TOOL_SEP, 1)
            if type_part.strip() != "function":
                continue

            function_name, params_dict = self._parse_steptml_invoke(
                invoke_part)

            if function_name and params_dict is not None:
                params_dict = self._cast_arguments(function_name, params_dict,
                                                   request)
                params_str = json.dumps(params_dict, ensure_ascii=False)
                tool_calls.append(
                    ToolCall(function=FunctionCall(name=function_name,
                                                   arguments=params_str)))
        if tool_calls:
            return ExtractedToolCallInformation(
                tools_called=True,
                tool_calls=tool_calls,
                content=content if content else None)
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

SPECIAL_TOKENS class-attribute instance-attribute

TOOL_CALLS_BEGIN class-attribute instance-attribute

TOOL_CALLS_BEGIN = '<|tool_calls_begin|>'

TOOL_CALLS_END class-attribute instance-attribute

TOOL_CALLS_END = '<|tool_calls_end|>'

TOOL_CALL_BEGIN class-attribute instance-attribute

TOOL_CALL_BEGIN = '<|tool_call_begin|>'

TOOL_CALL_END class-attribute instance-attribute

TOOL_CALL_END = '<|tool_call_end|>'

TOOL_SEP class-attribute instance-attribute

TOOL_SEP = '<|tool_sep|>'

position instance-attribute

position = 0

tool_block_finished instance-attribute

tool_block_finished = False

tool_block_started instance-attribute

tool_block_started = False

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)
    self.position = 0
    # Explicit state flags for robust streaming
    self.tool_block_started = False
    self.tool_block_finished = False

_cast_arguments

_cast_arguments(
    func_name: str,
    params: dict[str, Any],
    request: ChatCompletionRequest,
) -> dict[str, Any]
Source code in vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
def _cast_arguments(
    self,
    func_name: str,
    params: dict[str, Any],
    request: ChatCompletionRequest,
) -> dict[str, Any]:
    for tool in request.tools or []:
        if tool.function.name == func_name:
            schema = tool.function.parameters or {}
            properties = schema.get("properties", {})
            for key, value in params.items():
                if not isinstance(value, str):
                    continue
                prop = properties.get(key, {})
                typ = prop.get("type")
                if typ == "string":
                    params[key] = value.strip()
                elif typ == "integer":
                    with contextlib.suppress(ValueError):
                        params[key] = int(value)
                elif typ == "number":
                    with contextlib.suppress(ValueError):
                        params[key] = float(value)
                elif typ == "boolean":
                    lower_val = value.lower()
                    params[key] = lower_val == "true" if lower_val in (
                        "true", "false") else value
                elif typ == "null":
                    params[key] = None if value.lower(
                    ) == "null" else value
            break
    return params

_parse_steptml_invoke staticmethod

_parse_steptml_invoke(
    action_text: str,
) -> tuple[Optional[str], Optional[dict[str, str]]]
Source code in vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
@staticmethod
def _parse_steptml_invoke(
        action_text: str
) -> tuple[Optional[str], Optional[dict[str, str]]]:
    func_name_match = re.search(r'<steptml:invoke name="([^"]+)">',
                                action_text)
    if not func_name_match:
        return None, None
    func_name = func_name_match.group(1)

    params: dict[str, str] = {}
    param_matches = re.findall(
        r'<steptml:parameter name="([^"]+)">([^<]*)</steptml:parameter>',
        action_text)
    for name, value in param_matches:
        params[name] = value.strip()
    return func_name, params

adjust_request

adjust_request(
    request: ChatCompletionRequest,
) -> ChatCompletionRequest
Source code in vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
def adjust_request(
        self, request: ChatCompletionRequest) -> ChatCompletionRequest:
    if request.tools and request.tool_choice != 'none':
        request.skip_special_tokens = False
    return request

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    if self.TOOL_CALLS_BEGIN not in model_output:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    pre_text, rest = model_output.split(self.TOOL_CALLS_BEGIN, 1)
    if self.TOOL_CALLS_END not in rest:
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

    tool_block, post_text = rest.split(self.TOOL_CALLS_END, 1)
    content = (pre_text + post_text).strip()

    tool_calls: list[ToolCall] = []
    call_parts = tool_block.split(self.TOOL_CALL_BEGIN)

    for part in call_parts:
        if not part or self.TOOL_CALL_END not in part:
            continue

        call_content = part.split(self.TOOL_CALL_END, 1)[0]
        if self.TOOL_SEP not in call_content:
            continue

        type_part, invoke_part = call_content.split(self.TOOL_SEP, 1)
        if type_part.strip() != "function":
            continue

        function_name, params_dict = self._parse_steptml_invoke(
            invoke_part)

        if function_name and params_dict is not None:
            params_dict = self._cast_arguments(function_name, params_dict,
                                               request)
            params_str = json.dumps(params_dict, ensure_ascii=False)
            tool_calls.append(
                ToolCall(function=FunctionCall(name=function_name,
                                               arguments=params_str)))
    if tool_calls:
        return ExtractedToolCallInformation(
            tools_called=True,
            tool_calls=tool_calls,
            content=content if content else None)
    return ExtractedToolCallInformation(tools_called=False,
                                        tool_calls=[],
                                        content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:

    # The main loop processes the stream from the last known position.
    while True:
        if self.position >= len(current_text):
            return None  # We've processed the entire stream.

        unprocessed_text = current_text[self.position:]

        # STATE: After all tools are done, all subsequent text is content.
        if self.tool_block_finished:
            self.position = len(current_text)
            return DeltaMessage(content=unprocessed_text)

        # STATE: Before the tool block has started.
        if not self.tool_block_started:
            if unprocessed_text.startswith(self.TOOL_CALLS_BEGIN):
                self.position += len(self.TOOL_CALLS_BEGIN)
                self.tool_block_started = True
                continue  # Token consumed, re-loop.

            start_pos = unprocessed_text.find(self.TOOL_CALLS_BEGIN)
            if start_pos == -1:
                if self.TOOL_CALLS_BEGIN.startswith(
                        unprocessed_text.strip()) and unprocessed_text:
                    return None  # It's a prefix, wait.
                self.position = len(current_text)
                return DeltaMessage(content=unprocessed_text)
            else:
                content = unprocessed_text[:start_pos]
                self.position += len(content)
                return DeltaMessage(content=content)

        # STATE: Inside the main tool block.
        offset = len(unprocessed_text) - len(unprocessed_text.lstrip())
        unprocessed_text = unprocessed_text.lstrip()
        self.position += offset

        if unprocessed_text.startswith(self.TOOL_CALLS_END):
            self.position += len(self.TOOL_CALLS_END)
            self.tool_block_finished = True
            self.current_tool_id = -1
            continue

        # Check if we are between tool calls.
        tool_finished = (
            self.current_tool_id != -1 and
            self.prev_tool_call_arr[self.current_tool_id].get("finished"))
        if self.current_tool_id == -1 or tool_finished:
            if unprocessed_text.startswith(self.TOOL_CALL_BEGIN):
                self.position += len(self.TOOL_CALL_BEGIN)
                if self.current_tool_id == -1:
                    self.current_tool_id = 0
                else:
                    self.current_tool_id += 1
                self.current_tool_name_sent = False
                while len(self.prev_tool_call_arr) <= self.current_tool_id:
                    self.prev_tool_call_arr.append({})
                self.prev_tool_call_arr[
                    self.current_tool_id]["finished"] = False
                continue

            if self.TOOL_CALL_BEGIN.startswith(unprocessed_text):
                return None

        # STATE: Parsing an active tool call.
        if self.current_tool_id != -1 and not self.prev_tool_call_arr[
                self.current_tool_id].get("finished", False):
            end_tool_pos = unprocessed_text.find(self.TOOL_CALL_END)
            if end_tool_pos == -1:
                tool_body = unprocessed_text
            else:
                tool_body = unprocessed_text[:end_tool_pos]

            if end_tool_pos == -1 and self.TOOL_CALL_END.startswith(
                    tool_body):
                return None

            function_name, arguments = self._parse_steptml_invoke(
                tool_body)
            if not function_name:
                return None

            tool_call_arr = {
                "name": function_name,
                "parameters": arguments or {}
            }

            # Send the function name as soon as it's parsed.
            if not self.current_tool_name_sent:
                self.current_tool_name_sent = True
                self.prev_tool_call_arr[self.current_tool_id].update(
                    tool_call_arr)
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.current_tool_id,
                                  type="function",
                                  id=f"chatcmpl-tool-{random_uuid()}",
                                  function=DeltaFunctionCall(
                                      name=function_name))
                ])

            # Update our internal state with the latest parsed arguments.
            self.prev_tool_call_arr[
                self.current_tool_id].update(  # noqa: E501
                    tool_call_arr)

            # Only send arguments when the tool call is complete.
            if end_tool_pos != -1:
                self.position += end_tool_pos + len(self.TOOL_CALL_END)
                self.prev_tool_call_arr[
                    self.current_tool_id]["finished"] = True

                final_args = self._cast_arguments(
                    function_name,
                    tool_call_arr.get("parameters", {}),  # type: ignore
                    request)
                if final_args:
                    final_args_json = json.dumps(final_args,
                                                 ensure_ascii=False)
                    return DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.current_tool_id,
                                      function=DeltaFunctionCall(
                                          arguments=final_args_json))
                    ])

            # If tool is not finished, return None to wait for more tokens.
            return None

        return None

ToolParser

Abstract ToolParser class that should not be used directly. Provided properties and methods should be used in derived classes.

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
class ToolParser:
    """
    Abstract ToolParser class that should not be used directly. Provided
    properties and methods should be used in
    derived classes.
    """

    def __init__(self, tokenizer: AnyTokenizer):
        self.prev_tool_call_arr: list[dict] = []
        # the index of the tool call that is currently being parsed
        self.current_tool_id: int = -1
        self.current_tool_name_sent: bool = False
        self.streamed_args_for_tool: list[str] = []

        self.model_tokenizer = tokenizer

    @cached_property
    def vocab(self) -> dict[str, int]:
        # NOTE: Only PreTrainedTokenizerFast is guaranteed to have .vocab
        # whereas all tokenizers have .get_vocab()
        return self.model_tokenizer.get_vocab()

    def adjust_request(
            self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        """
        Static method that used to adjust the request parameters.
        """
        return request

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        """
        Static method that should be implemented for extracting tool calls from
        a complete model-generated string.
        Used for non-streaming responses where we have the entire model response
        available before sending to the client.
        Static because it's stateless.
        """
        raise NotImplementedError(
            "AbstractToolParser.extract_tool_calls has not been implemented!")

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        """
        Instance method that should be implemented for extracting tool calls
        from an incomplete response; for use when handling tool calls and
        streaming. Has to be an instance method because  it requires state -
        the current tokens/diffs, but also the information about what has
        previously been parsed and extracted (see constructor)
        """
        raise NotImplementedError(
            "AbstractToolParser.extract_tool_calls_streaming has not been "
            "implemented!")

current_tool_id instance-attribute

current_tool_id: int = -1

current_tool_name_sent instance-attribute

current_tool_name_sent: bool = False

model_tokenizer instance-attribute

model_tokenizer = tokenizer

prev_tool_call_arr instance-attribute

prev_tool_call_arr: list[dict] = []

streamed_args_for_tool instance-attribute

streamed_args_for_tool: list[str] = []

vocab cached property

vocab: dict[str, int]

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    self.prev_tool_call_arr: list[dict] = []
    # the index of the tool call that is currently being parsed
    self.current_tool_id: int = -1
    self.current_tool_name_sent: bool = False
    self.streamed_args_for_tool: list[str] = []

    self.model_tokenizer = tokenizer

adjust_request

adjust_request(
    request: ChatCompletionRequest,
) -> ChatCompletionRequest

Static method that used to adjust the request parameters.

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
def adjust_request(
        self, request: ChatCompletionRequest) -> ChatCompletionRequest:
    """
    Static method that used to adjust the request parameters.
    """
    return request

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Static method that should be implemented for extracting tool calls from a complete model-generated string. Used for non-streaming responses where we have the entire model response available before sending to the client. Static because it's stateless.

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    """
    Static method that should be implemented for extracting tool calls from
    a complete model-generated string.
    Used for non-streaming responses where we have the entire model response
    available before sending to the client.
    Static because it's stateless.
    """
    raise NotImplementedError(
        "AbstractToolParser.extract_tool_calls has not been implemented!")

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]

Instance method that should be implemented for extracting tool calls from an incomplete response; for use when handling tool calls and streaming. Has to be an instance method because it requires state - the current tokens/diffs, but also the information about what has previously been parsed and extracted (see constructor)

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    """
    Instance method that should be implemented for extracting tool calls
    from an incomplete response; for use when handling tool calls and
    streaming. Has to be an instance method because  it requires state -
    the current tokens/diffs, but also the information about what has
    previously been parsed and extracted (see constructor)
    """
    raise NotImplementedError(
        "AbstractToolParser.extract_tool_calls_streaming has not been "
        "implemented!")

ToolParserManager

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
class ToolParserManager:
    tool_parsers: dict[str, type] = {}

    @classmethod
    def get_tool_parser(cls, name) -> type:
        """
        Get tool parser by name which is registered by `register_module`.

        Raise a KeyError exception if the name is not registered.
        """
        if name in cls.tool_parsers:
            return cls.tool_parsers[name]

        raise KeyError(f"tool helper: '{name}' not found in tool_parsers")

    @classmethod
    def _register_module(cls,
                         module: type,
                         module_name: Optional[Union[str, list[str]]] = None,
                         force: bool = True) -> None:
        if not issubclass(module, ToolParser):
            raise TypeError(
                f'module must be subclass of ToolParser, but got {type(module)}'
            )
        if module_name is None:
            module_name = module.__name__
        if isinstance(module_name, str):
            module_name = [module_name]
        for name in module_name:
            if not force and name in cls.tool_parsers:
                existed_module = cls.tool_parsers[name]
                raise KeyError(f'{name} is already registered '
                               f'at {existed_module.__module__}')
            cls.tool_parsers[name] = module

    @classmethod
    def register_module(
            cls,
            name: Optional[Union[str, list[str]]] = None,
            force: bool = True,
            module: Union[type, None] = None) -> Union[type, Callable]:
        """
        Register module with the given name or name list. it can be used as a
        decoder(with module as None) or normal function(with module as not 
        None).
        """
        if not isinstance(force, bool):
            raise TypeError(f'force must be a boolean, but got {type(force)}')

        # raise the error ahead of time
        if not (name is None or isinstance(name, str)
                or is_list_of(name, str)):
            raise TypeError(
                'name must be None, an instance of str, or a sequence of str, '
                f'but got {type(name)}')

        # use it as a normal method: x.register_module(module=SomeClass)
        if module is not None:
            cls._register_module(module=module, module_name=name, force=force)
            return module

        # use it as a decorator: @x.register_module()
        def _register(module):
            cls._register_module(module=module, module_name=name, force=force)
            return module

        return _register

    @classmethod
    def import_tool_parser(cls, plugin_path: str) -> None:
        """
        Import a user-defined tool parser by the path of the tool parser define
        file.
        """
        module_name = os.path.splitext(os.path.basename(plugin_path))[0]

        try:
            import_from_path(module_name, plugin_path)
        except Exception:
            logger.exception("Failed to load module '%s' from %s.",
                             module_name, plugin_path)
            return

tool_parsers class-attribute instance-attribute

tool_parsers: dict[str, type] = {}

_register_module classmethod

_register_module(
    module: type,
    module_name: Optional[Union[str, list[str]]] = None,
    force: bool = True,
) -> None
Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@classmethod
def _register_module(cls,
                     module: type,
                     module_name: Optional[Union[str, list[str]]] = None,
                     force: bool = True) -> None:
    if not issubclass(module, ToolParser):
        raise TypeError(
            f'module must be subclass of ToolParser, but got {type(module)}'
        )
    if module_name is None:
        module_name = module.__name__
    if isinstance(module_name, str):
        module_name = [module_name]
    for name in module_name:
        if not force and name in cls.tool_parsers:
            existed_module = cls.tool_parsers[name]
            raise KeyError(f'{name} is already registered '
                           f'at {existed_module.__module__}')
        cls.tool_parsers[name] = module

get_tool_parser classmethod

get_tool_parser(name) -> type

Get tool parser by name which is registered by register_module.

Raise a KeyError exception if the name is not registered.

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@classmethod
def get_tool_parser(cls, name) -> type:
    """
    Get tool parser by name which is registered by `register_module`.

    Raise a KeyError exception if the name is not registered.
    """
    if name in cls.tool_parsers:
        return cls.tool_parsers[name]

    raise KeyError(f"tool helper: '{name}' not found in tool_parsers")

import_tool_parser classmethod

import_tool_parser(plugin_path: str) -> None

Import a user-defined tool parser by the path of the tool parser define file.

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@classmethod
def import_tool_parser(cls, plugin_path: str) -> None:
    """
    Import a user-defined tool parser by the path of the tool parser define
    file.
    """
    module_name = os.path.splitext(os.path.basename(plugin_path))[0]

    try:
        import_from_path(module_name, plugin_path)
    except Exception:
        logger.exception("Failed to load module '%s' from %s.",
                         module_name, plugin_path)
        return

register_module classmethod

register_module(
    name: Optional[Union[str, list[str]]] = None,
    force: bool = True,
    module: Union[type, None] = None,
) -> Union[type, Callable]

Register module with the given name or name list. it can be used as a decoder(with module as None) or normal function(with module as not None).

Source code in vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@classmethod
def register_module(
        cls,
        name: Optional[Union[str, list[str]]] = None,
        force: bool = True,
        module: Union[type, None] = None) -> Union[type, Callable]:
    """
    Register module with the given name or name list. it can be used as a
    decoder(with module as None) or normal function(with module as not 
    None).
    """
    if not isinstance(force, bool):
        raise TypeError(f'force must be a boolean, but got {type(force)}')

    # raise the error ahead of time
    if not (name is None or isinstance(name, str)
            or is_list_of(name, str)):
        raise TypeError(
            'name must be None, an instance of str, or a sequence of str, '
            f'but got {type(name)}')

    # use it as a normal method: x.register_module(module=SomeClass)
    if module is not None:
        cls._register_module(module=module, module_name=name, force=force)
        return module

    # use it as a decorator: @x.register_module()
    def _register(module):
        cls._register_module(module=module, module_name=name, force=force)
        return module

    return _register

xLAMToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
@ToolParserManager.register_module("xlam")
class xLAMToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)

        # Initialize state for streaming mode
        self.prev_tool_calls: list[dict] = []
        self.current_tool_id = -1
        self.current_tool_name_sent = False
        self.streamed_args: list[str] = [
        ]  # Track arguments sent for each tool

        # For backward compatibility with tests
        self.current_tools_sent: list[bool] = []

        # For backward compatibility with serving code
        self.prev_tool_call_arr = []

        # Regex patterns for preprocessing
        self.json_code_block_patterns = [
            r"```(?:json)?\s*([\s\S]*?)```",
            r"\[TOOL_CALLS\]([\s\S]*?)(?=\n|$)",
            r"<tool_call>([\s\S]*?)</tool_call>",
        ]
        self.thinking_tag_pattern = r"</think>([\s\S]*)"

        # Define streaming state type to be initialized later
        self.streaming_state: dict[str, Any] = {
            "current_tool_index": -1,
            "tool_ids": [],
            "sent_tools": [],
        }

    def preprocess_model_output(
            self, model_output: str) -> tuple[Optional[str], Optional[str]]:
        """
        Preprocess the model output to extract content and potential tool calls.
        Returns:
            Tuple of (content, potential_tool_calls_json)
        """
        # Check for thinking tag
        thinking_match = re.search(self.thinking_tag_pattern, model_output)
        if thinking_match:
            content = model_output[:thinking_match.start() +
                                   len("</think>")].strip()
            thinking_content = thinking_match.group(1).strip()

            # Try to parse the thinking content as JSON
            try:
                json.loads(thinking_content)
                return content, thinking_content
            except json.JSONDecodeError:
                # If can't parse as JSON, look for JSON code blocks
                for json_pattern in self.json_code_block_patterns:
                    json_matches = re.findall(json_pattern, thinking_content)
                    if json_matches:
                        for json_str in json_matches:
                            try:
                                json.loads(json_str)
                                return content, json_str
                            except json.JSONDecodeError:
                                continue

        # Check for JSON code blocks in the entire output
        for json_pattern in self.json_code_block_patterns:
            json_matches = re.findall(json_pattern, model_output)
            if json_matches:
                for json_str in json_matches:
                    try:
                        json.loads(json_str)
                        # Extract content by removing the JSON code block
                        content = re.sub(json_pattern, "",
                                         model_output).strip()
                        return content, json_str
                    except json.JSONDecodeError:
                        continue

        # If the entire output is a valid JSON array or looks like one, treat it as tool calls
        if model_output.strip().startswith("["):
            try:
                json.loads(model_output)
                return None, model_output
            except json.JSONDecodeError:
                # Even if it's not valid JSON yet, it might be a tool call in progress
                if ("{" in model_output and "name" in model_output
                        and "arguments" in model_output):
                    return None, model_output

        # If no tool calls found, return the original output as content
        return model_output, None

    def extract_tool_calls(
            self, model_output: str,
            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
        """
        Extract tool calls from a complete model output.
        """
        try:
            # Preprocess the model output
            content, potential_tool_calls = self.preprocess_model_output(
                model_output)

            if not potential_tool_calls:
                return ExtractedToolCallInformation(tools_called=False,
                                                    tool_calls=[],
                                                    content=content)

            # Parse the potential tool calls as JSON
            tool_calls_data = json.loads(potential_tool_calls)

            # Ensure it's an array
            if not isinstance(tool_calls_data, list):
                logger.debug("Tool calls data is not an array")
                return ExtractedToolCallInformation(
                    tools_called=False,
                    tool_calls=[],
                    content=content or model_output,
                )

            tool_calls: list[ToolCall] = []

            for idx, call in enumerate(tool_calls_data):
                if (not isinstance(call, dict) or "name" not in call
                        or "arguments" not in call):
                    logger.debug("Invalid tool call format at index %d", idx)
                    continue

                tool_call = ToolCall(
                    id=f"call_{idx}_{random_uuid()}",
                    type="function",
                    function=FunctionCall(
                        name=call["name"],
                        arguments=(json.dumps(call["arguments"]) if isinstance(
                            call["arguments"], dict) else call["arguments"]),
                    ),
                )
                tool_calls.append(tool_call)

            return ExtractedToolCallInformation(
                tools_called=len(tool_calls) > 0,
                tool_calls=tool_calls,
                content=content,
            )

        except Exception as e:
            logger.exception("Error extracting tool calls: %s", str(e))
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=model_output)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        """
        Extract tool calls for streaming mode.
        """
        # Simplify detection: if it begins with "[" treat it as a function call
        is_function_call = (current_text.strip().startswith("["))

        # If not a function call, return normal content
        if not is_function_call:
            return DeltaMessage(content=delta_text)

        try:
            # Initialize streaming state if not exists
            if not hasattr(self, "streaming_state"):
                self.streaming_state = {
                    "current_tool_index": -1,
                    "tool_ids": [],
                    "sent_tools": [],  # Track complete state of each tool
                }

            # Try parsing as JSON to check for complete tool calls
            try:
                parsed_tools = json.loads(current_text)
                if isinstance(parsed_tools, list):
                    # Update our tool array for next time
                    self.prev_tool_call_arr = parsed_tools
            except json.JSONDecodeError:
                # Not complete JSON yet, use regex for partial parsing
                pass

            # Check for test-specific state setup (current_tools_sent)
            # This handles the case where tests manually set current_tools_sent
            if (hasattr(self, "current_tools_sent")  # type: ignore
                    and len(self.current_tools_sent) > 0):
                # If current_tools_sent is set to [False], it means the test wants us to send the name
                if (len(self.current_tools_sent) == 1
                        and self.current_tools_sent[0] is False):
                    # Extract the function name using regex
                    name_pattern = r'"name"\s*:\s*"([^"]+)"'
                    name_match = re.search(name_pattern, current_text)
                    if name_match:
                        function_name = name_match.group(1)

                        # The test expects us to send just the name first
                        tool_id = make_tool_call_id()
                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=0,
                                type="function",
                                id=tool_id,
                                function=DeltaFunctionCall(
                                    name=function_name).model_dump(
                                        exclude_none=True),  # type: ignore
                            )
                        ])
                        # Update state to reflect that we've sent the name
                        self.current_tools_sent = [True]
                        self.current_tool_id = 0
                        self.streaming_state["current_tool_index"] = 0
                        if len(self.streaming_state["sent_tools"]) == 0:
                            self.streaming_state["sent_tools"].append({
                                "sent_name":
                                True,
                                "sent_arguments_prefix":
                                False,
                                "sent_arguments":
                                "",
                            })
                        else:
                            self.streaming_state["sent_tools"][0][
                                "sent_name"] = True
                        self.current_tool_name_sent = True
                        return delta

            # Use regex to identify tool calls in the output
            name_pattern = r'"name"\s*:\s*"([^"]+)"'
            name_matches = list(re.finditer(name_pattern, current_text))
            tool_count = len(name_matches)

            # If no tools found yet, return
            if tool_count == 0:
                return None

            # Ensure our state arrays are large enough
            while len(self.streaming_state["sent_tools"]) < tool_count:
                self.streaming_state["sent_tools"].append({
                    "sent_name":
                    False,
                    "sent_arguments_prefix":
                    False,
                    "sent_arguments":
                    "",
                })

            while len(self.streaming_state["tool_ids"]) < tool_count:
                self.streaming_state["tool_ids"].append(None)

            # Determine if we need to move to a new tool
            current_idx = self.streaming_state["current_tool_index"]

            # If we haven't processed any tool yet or current tool is complete, move to next
            if current_idx == -1 or current_idx < tool_count - 1:
                next_idx = current_idx + 1

                # If tool at next_idx has not been sent yet
                if (next_idx < tool_count
                        and not self.streaming_state["sent_tools"][next_idx]
                    ["sent_name"]):
                    # Update indexes
                    self.streaming_state["current_tool_index"] = next_idx
                    self.current_tool_id = (
                        next_idx  # For backward compatibility
                    )
                    current_idx = next_idx

                    # Extract the tool name
                    tool_name = name_matches[current_idx].group(1)

                    # Generate ID and send tool name
                    tool_id = f"call_{current_idx}_{random_uuid()}"
                    self.streaming_state["tool_ids"][current_idx] = tool_id

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=current_idx,
                            type="function",
                            id=tool_id,
                            function=DeltaFunctionCall(
                                name=tool_name).model_dump(
                                    exclude_none=True),  # type: ignore
                        )
                    ])
                    self.streaming_state["sent_tools"][current_idx][
                        "sent_name"] = True
                    self.current_tool_name_sent = (
                        True  # For backward compatibility
                    )

                    # Keep track of streamed args for backward compatibility
                    while len(self.streamed_args) <= current_idx:
                        self.streamed_args.append("")

                    return delta

            # Process arguments for the current tool
            if current_idx >= 0 and current_idx < tool_count:
                # Support both regular and empty argument objects
                # First, check for the empty arguments case: "arguments": {}
                empty_args_pattern = (
                    r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*\{\s*\}')
                empty_args_match = re.search(empty_args_pattern, current_text)

                # Check if this tool has empty arguments
                if empty_args_match and empty_args_match.start() > 0:
                    # Find which tool this empty arguments belongs to
                    empty_args_tool_idx = 0
                    for i in range(tool_count):
                        if i == current_idx:
                            # If this is our current tool and it has empty arguments
                            if not self.streaming_state["sent_tools"][
                                    current_idx]["sent_arguments_prefix"]:
                                # Send empty object
                                self.streaming_state["sent_tools"][
                                    current_idx][
                                        "sent_arguments_prefix"] = True
                                self.streaming_state["sent_tools"][
                                    current_idx]["sent_arguments"] = "{}"

                                # Update streamed_args for backward compatibility
                                while len(self.streamed_args) <= current_idx:
                                    self.streamed_args.append("")
                                self.streamed_args[current_idx] += "{}"

                                delta = DeltaMessage(tool_calls=[
                                    DeltaToolCall(
                                        index=current_idx,
                                        function=DeltaFunctionCall(
                                            arguments="{}").
                                        model_dump(
                                            exclude_none=True),  # type: ignore
                                    )
                                ])

                                # Move to next tool if available
                                if current_idx < tool_count - 1:
                                    self.streaming_state[
                                        "current_tool_index"] += 1
                                    self.current_tool_id = self.streaming_state[
                                        "current_tool_index"]

                                return delta

                # Extract arguments for current tool using regex for non-empty arguments
                args_pattern = r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*(\{(?:[^{}]|(?:\{[^{}]*\}))*\})'
                args_matches = list(re.finditer(args_pattern, current_text))

                if current_idx < len(args_matches):
                    args_text = args_matches[current_idx].group(1)

                    # Handle transition between tools
                    is_last_tool = current_idx == tool_count - 1

                    # Find where the arguments for our current tool end
                    if not is_last_tool:
                        # If we have more tools after this one, try to find the complete argument block
                        next_tool_pos = current_text.find(
                            "},{", args_matches[current_idx].start())
                        if next_tool_pos != -1:
                            args_end_pos = (next_tool_pos + 1
                                            )  # +1 to include the '}'
                            args_text = (current_text[args_matches[current_idx]
                                                      .start():args_end_pos].
                                         split('"arguments":')[1].strip())

                    # If arguments haven't been sent yet
                    sent_args = self.streaming_state["sent_tools"][
                        current_idx]["sent_arguments"]

                    # If we haven't sent the opening bracket yet
                    if not self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments_prefix"] and args_text.startswith(
                                "{"):
                        self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments_prefix"] = True
                        self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments"] = "{"

                        # Update streamed_args for backward compatibility
                        while len(self.streamed_args) <= current_idx:
                            self.streamed_args.append("")
                        self.streamed_args[current_idx] += "{"

                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=current_idx,
                                function=DeltaFunctionCall(
                                    arguments="{").model_dump(
                                        exclude_none=True),  # type: ignore  
                            )
                        ])
                        return delta

                    # If we need to send more arguments
                    if args_text.startswith(sent_args):
                        # Calculate what part of arguments we need to send
                        args_diff = args_text[len(sent_args):]

                        if args_diff:
                            # Update our state
                            self.streaming_state["sent_tools"][current_idx][
                                "sent_arguments"] = args_text

                            # Update streamed_args for backward compatibility
                            while len(self.streamed_args) <= current_idx:
                                self.streamed_args.append("")
                            self.streamed_args[current_idx] += args_diff

                            delta = DeltaMessage(tool_calls=[
                                DeltaToolCall(
                                    index=current_idx,
                                    function=DeltaFunctionCall(
                                        arguments=args_diff).model_dump(
                                            exclude_none=True),  # type: ignore
                                )
                            ])
                            return delta

                    # If the tool's arguments are complete, check if we need to move to the next tool
                    if args_text.endswith("}") and args_text == sent_args:
                        # This tool is complete, move to the next one in the next iteration
                        if current_idx < tool_count - 1:
                            self.streaming_state["current_tool_index"] += 1
                            self.current_tool_id = self.streaming_state[
                                "current_tool_index"]  # For compatibility

            # If we got here, we couldn't determine what to stream next
            return None

        except Exception as e:
            logger.exception(f"Error in streaming tool calls: {e}")
            # If we encounter an error, just return the delta text as regular content
            return DeltaMessage(content=delta_text)

current_tool_id instance-attribute

current_tool_id = -1

current_tool_name_sent instance-attribute

current_tool_name_sent = False

current_tools_sent instance-attribute

current_tools_sent: list[bool] = []

json_code_block_patterns instance-attribute

json_code_block_patterns = [
    "```(?:json)?\\s*([\\s\\S]*?)```",
    "\\[TOOL_CALLS\\]([\\s\\S]*?)(?=\\n|$)",
    "<tool_call>([\\s\\S]*?)</tool_call>",
]

prev_tool_call_arr instance-attribute

prev_tool_call_arr = []

prev_tool_calls instance-attribute

prev_tool_calls: list[dict] = []

streamed_args instance-attribute

streamed_args: list[str] = []

streaming_state instance-attribute

streaming_state: dict[str, Any] = {
    "current_tool_index": -1,
    "tool_ids": [],
    "sent_tools": [],
}

thinking_tag_pattern instance-attribute

thinking_tag_pattern = '</think>([\\s\\S]*)'

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)

    # Initialize state for streaming mode
    self.prev_tool_calls: list[dict] = []
    self.current_tool_id = -1
    self.current_tool_name_sent = False
    self.streamed_args: list[str] = [
    ]  # Track arguments sent for each tool

    # For backward compatibility with tests
    self.current_tools_sent: list[bool] = []

    # For backward compatibility with serving code
    self.prev_tool_call_arr = []

    # Regex patterns for preprocessing
    self.json_code_block_patterns = [
        r"```(?:json)?\s*([\s\S]*?)```",
        r"\[TOOL_CALLS\]([\s\S]*?)(?=\n|$)",
        r"<tool_call>([\s\S]*?)</tool_call>",
    ]
    self.thinking_tag_pattern = r"</think>([\s\S]*)"

    # Define streaming state type to be initialized later
    self.streaming_state: dict[str, Any] = {
        "current_tool_index": -1,
        "tool_ids": [],
        "sent_tools": [],
    }

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract tool calls from a complete model output.

Source code in vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
def extract_tool_calls(
        self, model_output: str,
        request: ChatCompletionRequest) -> ExtractedToolCallInformation:
    """
    Extract tool calls from a complete model output.
    """
    try:
        # Preprocess the model output
        content, potential_tool_calls = self.preprocess_model_output(
            model_output)

        if not potential_tool_calls:
            return ExtractedToolCallInformation(tools_called=False,
                                                tool_calls=[],
                                                content=content)

        # Parse the potential tool calls as JSON
        tool_calls_data = json.loads(potential_tool_calls)

        # Ensure it's an array
        if not isinstance(tool_calls_data, list):
            logger.debug("Tool calls data is not an array")
            return ExtractedToolCallInformation(
                tools_called=False,
                tool_calls=[],
                content=content or model_output,
            )

        tool_calls: list[ToolCall] = []

        for idx, call in enumerate(tool_calls_data):
            if (not isinstance(call, dict) or "name" not in call
                    or "arguments" not in call):
                logger.debug("Invalid tool call format at index %d", idx)
                continue

            tool_call = ToolCall(
                id=f"call_{idx}_{random_uuid()}",
                type="function",
                function=FunctionCall(
                    name=call["name"],
                    arguments=(json.dumps(call["arguments"]) if isinstance(
                        call["arguments"], dict) else call["arguments"]),
                ),
            )
            tool_calls.append(tool_call)

        return ExtractedToolCallInformation(
            tools_called=len(tool_calls) > 0,
            tool_calls=tool_calls,
            content=content,
        )

    except Exception as e:
        logger.exception("Error extracting tool calls: %s", str(e))
        return ExtractedToolCallInformation(tools_called=False,
                                            tool_calls=[],
                                            content=model_output)

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]

Extract tool calls for streaming mode.

Source code in vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    """
    Extract tool calls for streaming mode.
    """
    # Simplify detection: if it begins with "[" treat it as a function call
    is_function_call = (current_text.strip().startswith("["))

    # If not a function call, return normal content
    if not is_function_call:
        return DeltaMessage(content=delta_text)

    try:
        # Initialize streaming state if not exists
        if not hasattr(self, "streaming_state"):
            self.streaming_state = {
                "current_tool_index": -1,
                "tool_ids": [],
                "sent_tools": [],  # Track complete state of each tool
            }

        # Try parsing as JSON to check for complete tool calls
        try:
            parsed_tools = json.loads(current_text)
            if isinstance(parsed_tools, list):
                # Update our tool array for next time
                self.prev_tool_call_arr = parsed_tools
        except json.JSONDecodeError:
            # Not complete JSON yet, use regex for partial parsing
            pass

        # Check for test-specific state setup (current_tools_sent)
        # This handles the case where tests manually set current_tools_sent
        if (hasattr(self, "current_tools_sent")  # type: ignore
                and len(self.current_tools_sent) > 0):
            # If current_tools_sent is set to [False], it means the test wants us to send the name
            if (len(self.current_tools_sent) == 1
                    and self.current_tools_sent[0] is False):
                # Extract the function name using regex
                name_pattern = r'"name"\s*:\s*"([^"]+)"'
                name_match = re.search(name_pattern, current_text)
                if name_match:
                    function_name = name_match.group(1)

                    # The test expects us to send just the name first
                    tool_id = make_tool_call_id()
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=0,
                            type="function",
                            id=tool_id,
                            function=DeltaFunctionCall(
                                name=function_name).model_dump(
                                    exclude_none=True),  # type: ignore
                        )
                    ])
                    # Update state to reflect that we've sent the name
                    self.current_tools_sent = [True]
                    self.current_tool_id = 0
                    self.streaming_state["current_tool_index"] = 0
                    if len(self.streaming_state["sent_tools"]) == 0:
                        self.streaming_state["sent_tools"].append({
                            "sent_name":
                            True,
                            "sent_arguments_prefix":
                            False,
                            "sent_arguments":
                            "",
                        })
                    else:
                        self.streaming_state["sent_tools"][0][
                            "sent_name"] = True
                    self.current_tool_name_sent = True
                    return delta

        # Use regex to identify tool calls in the output
        name_pattern = r'"name"\s*:\s*"([^"]+)"'
        name_matches = list(re.finditer(name_pattern, current_text))
        tool_count = len(name_matches)

        # If no tools found yet, return
        if tool_count == 0:
            return None

        # Ensure our state arrays are large enough
        while len(self.streaming_state["sent_tools"]) < tool_count:
            self.streaming_state["sent_tools"].append({
                "sent_name":
                False,
                "sent_arguments_prefix":
                False,
                "sent_arguments":
                "",
            })

        while len(self.streaming_state["tool_ids"]) < tool_count:
            self.streaming_state["tool_ids"].append(None)

        # Determine if we need to move to a new tool
        current_idx = self.streaming_state["current_tool_index"]

        # If we haven't processed any tool yet or current tool is complete, move to next
        if current_idx == -1 or current_idx < tool_count - 1:
            next_idx = current_idx + 1

            # If tool at next_idx has not been sent yet
            if (next_idx < tool_count
                    and not self.streaming_state["sent_tools"][next_idx]
                ["sent_name"]):
                # Update indexes
                self.streaming_state["current_tool_index"] = next_idx
                self.current_tool_id = (
                    next_idx  # For backward compatibility
                )
                current_idx = next_idx

                # Extract the tool name
                tool_name = name_matches[current_idx].group(1)

                # Generate ID and send tool name
                tool_id = f"call_{current_idx}_{random_uuid()}"
                self.streaming_state["tool_ids"][current_idx] = tool_id

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=current_idx,
                        type="function",
                        id=tool_id,
                        function=DeltaFunctionCall(
                            name=tool_name).model_dump(
                                exclude_none=True),  # type: ignore
                    )
                ])
                self.streaming_state["sent_tools"][current_idx][
                    "sent_name"] = True
                self.current_tool_name_sent = (
                    True  # For backward compatibility
                )

                # Keep track of streamed args for backward compatibility
                while len(self.streamed_args) <= current_idx:
                    self.streamed_args.append("")

                return delta

        # Process arguments for the current tool
        if current_idx >= 0 and current_idx < tool_count:
            # Support both regular and empty argument objects
            # First, check for the empty arguments case: "arguments": {}
            empty_args_pattern = (
                r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*\{\s*\}')
            empty_args_match = re.search(empty_args_pattern, current_text)

            # Check if this tool has empty arguments
            if empty_args_match and empty_args_match.start() > 0:
                # Find which tool this empty arguments belongs to
                empty_args_tool_idx = 0
                for i in range(tool_count):
                    if i == current_idx:
                        # If this is our current tool and it has empty arguments
                        if not self.streaming_state["sent_tools"][
                                current_idx]["sent_arguments_prefix"]:
                            # Send empty object
                            self.streaming_state["sent_tools"][
                                current_idx][
                                    "sent_arguments_prefix"] = True
                            self.streaming_state["sent_tools"][
                                current_idx]["sent_arguments"] = "{}"

                            # Update streamed_args for backward compatibility
                            while len(self.streamed_args) <= current_idx:
                                self.streamed_args.append("")
                            self.streamed_args[current_idx] += "{}"

                            delta = DeltaMessage(tool_calls=[
                                DeltaToolCall(
                                    index=current_idx,
                                    function=DeltaFunctionCall(
                                        arguments="{}").
                                    model_dump(
                                        exclude_none=True),  # type: ignore
                                )
                            ])

                            # Move to next tool if available
                            if current_idx < tool_count - 1:
                                self.streaming_state[
                                    "current_tool_index"] += 1
                                self.current_tool_id = self.streaming_state[
                                    "current_tool_index"]

                            return delta

            # Extract arguments for current tool using regex for non-empty arguments
            args_pattern = r'"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:\s*(\{(?:[^{}]|(?:\{[^{}]*\}))*\})'
            args_matches = list(re.finditer(args_pattern, current_text))

            if current_idx < len(args_matches):
                args_text = args_matches[current_idx].group(1)

                # Handle transition between tools
                is_last_tool = current_idx == tool_count - 1

                # Find where the arguments for our current tool end
                if not is_last_tool:
                    # If we have more tools after this one, try to find the complete argument block
                    next_tool_pos = current_text.find(
                        "},{", args_matches[current_idx].start())
                    if next_tool_pos != -1:
                        args_end_pos = (next_tool_pos + 1
                                        )  # +1 to include the '}'
                        args_text = (current_text[args_matches[current_idx]
                                                  .start():args_end_pos].
                                     split('"arguments":')[1].strip())

                # If arguments haven't been sent yet
                sent_args = self.streaming_state["sent_tools"][
                    current_idx]["sent_arguments"]

                # If we haven't sent the opening bracket yet
                if not self.streaming_state["sent_tools"][current_idx][
                        "sent_arguments_prefix"] and args_text.startswith(
                            "{"):
                    self.streaming_state["sent_tools"][current_idx][
                        "sent_arguments_prefix"] = True
                    self.streaming_state["sent_tools"][current_idx][
                        "sent_arguments"] = "{"

                    # Update streamed_args for backward compatibility
                    while len(self.streamed_args) <= current_idx:
                        self.streamed_args.append("")
                    self.streamed_args[current_idx] += "{"

                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(
                            index=current_idx,
                            function=DeltaFunctionCall(
                                arguments="{").model_dump(
                                    exclude_none=True),  # type: ignore  
                        )
                    ])
                    return delta

                # If we need to send more arguments
                if args_text.startswith(sent_args):
                    # Calculate what part of arguments we need to send
                    args_diff = args_text[len(sent_args):]

                    if args_diff:
                        # Update our state
                        self.streaming_state["sent_tools"][current_idx][
                            "sent_arguments"] = args_text

                        # Update streamed_args for backward compatibility
                        while len(self.streamed_args) <= current_idx:
                            self.streamed_args.append("")
                        self.streamed_args[current_idx] += args_diff

                        delta = DeltaMessage(tool_calls=[
                            DeltaToolCall(
                                index=current_idx,
                                function=DeltaFunctionCall(
                                    arguments=args_diff).model_dump(
                                        exclude_none=True),  # type: ignore
                            )
                        ])
                        return delta

                # If the tool's arguments are complete, check if we need to move to the next tool
                if args_text.endswith("}") and args_text == sent_args:
                    # This tool is complete, move to the next one in the next iteration
                    if current_idx < tool_count - 1:
                        self.streaming_state["current_tool_index"] += 1
                        self.current_tool_id = self.streaming_state[
                            "current_tool_index"]  # For compatibility

        # If we got here, we couldn't determine what to stream next
        return None

    except Exception as e:
        logger.exception(f"Error in streaming tool calls: {e}")
        # If we encounter an error, just return the delta text as regular content
        return DeltaMessage(content=delta_text)

preprocess_model_output

preprocess_model_output(
    model_output: str,
) -> tuple[Optional[str], Optional[str]]

Preprocess the model output to extract content and potential tool calls. Returns: Tuple of (content, potential_tool_calls_json)

Source code in vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
def preprocess_model_output(
        self, model_output: str) -> tuple[Optional[str], Optional[str]]:
    """
    Preprocess the model output to extract content and potential tool calls.
    Returns:
        Tuple of (content, potential_tool_calls_json)
    """
    # Check for thinking tag
    thinking_match = re.search(self.thinking_tag_pattern, model_output)
    if thinking_match:
        content = model_output[:thinking_match.start() +
                               len("</think>")].strip()
        thinking_content = thinking_match.group(1).strip()

        # Try to parse the thinking content as JSON
        try:
            json.loads(thinking_content)
            return content, thinking_content
        except json.JSONDecodeError:
            # If can't parse as JSON, look for JSON code blocks
            for json_pattern in self.json_code_block_patterns:
                json_matches = re.findall(json_pattern, thinking_content)
                if json_matches:
                    for json_str in json_matches:
                        try:
                            json.loads(json_str)
                            return content, json_str
                        except json.JSONDecodeError:
                            continue

    # Check for JSON code blocks in the entire output
    for json_pattern in self.json_code_block_patterns:
        json_matches = re.findall(json_pattern, model_output)
        if json_matches:
            for json_str in json_matches:
                try:
                    json.loads(json_str)
                    # Extract content by removing the JSON code block
                    content = re.sub(json_pattern, "",
                                     model_output).strip()
                    return content, json_str
                except json.JSONDecodeError:
                    continue

    # If the entire output is a valid JSON array or looks like one, treat it as tool calls
    if model_output.strip().startswith("["):
        try:
            json.loads(model_output)
            return None, model_output
        except json.JSONDecodeError:
            # Even if it's not valid JSON yet, it might be a tool call in progress
            if ("{" in model_output and "name" in model_output
                    and "arguments" in model_output):
                return None, model_output

    # If no tool calls found, return the original output as content
    return model_output, None