Initial commit

2026-04-21 15:44:47 +00:00
commit bce3fe1395
40 changed files with 1758724 additions and 0 deletions
--- a/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/chat_template.jinja
+++ b/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/chat_template.jinja
@@ -0,0 +1,117 @@
+{% macro render_extra_keys(json_dict, handled_keys) %}
+    {%- if json_dict is mapping %}
+        {%- for json_key in json_dict if json_key not in handled_keys %}
+            {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
+                {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
+            {%- else %}
+                {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+{% endmacro %}
+
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+
+{%- if not tools is defined %}
+    {%- set tools = [] %}
+{%- endif %}
+
+{%- if system_message is defined %}
+    {{- "<|im_start|>system\n" + system_message }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
+    {%- endif %}
+{%- endif %}
+{%- if tools is iterable and tools | length > 0 %}
+    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "<tools>" }}
+    {%- for tool in tools %}
+        {%- if tool.function is defined %}
+            {%- set tool = tool.function %}
+        {%- endif %}
+        {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
+        {%- if tool.description is defined %}
+            {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
+        {%- endif %}
+        {{- '\n<parameters>' }}
+        {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
+            {%- for param_name, param_fields in tool.parameters.properties|items %}
+                {{- '\n<parameter>' }}
+                {{- '\n<name>' ~ param_name ~ '</name>' }}
+                {%- if param_fields.type is defined %}
+                    {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
+                {%- endif %}
+                {%- if param_fields.description is defined %}
+                    {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
+                {%- endif %}
+                {%- set handled_keys = ['name', 'type', 'description'] %}
+                {{- render_extra_keys(param_fields, handled_keys) }}
+                {{- '\n</parameter>' }}
+            {%- endfor %}
+        {%- endif %}
+        {% set handled_keys = ['type', 'properties'] %}
+        {{- render_extra_keys(tool.parameters, handled_keys) }}
+        {{- '\n</parameters>' }}
+        {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
+        {{- render_extra_keys(tool, handled_keys) }}
+        {{- '\n</function>' }}
+    {%- endfor %}
+    {{- "\n</tools>" }}
+    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+{%- endif %}
+{%- if system_message is defined %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in loop_messages %}
+    {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
+            {{- '\n' + message.content | trim + '\n' }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+            {%- if tool_call.arguments is defined %}
+                {%- for args_name, args_value in tool_call.arguments|items %}
+                    {{- '<parameter=' + args_name + '>\n' }}
+                    {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                    {{- args_value }}
+                    {{- '\n</parameter>\n' }}
+                {%- endfor %}
+            {%- endif %}
+            {{- '</function>\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>\n' }}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>\n' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- else %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}
--- a/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/config.json
+++ b/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/config.json
@@ -0,0 +1,39 @@
+{
+  "architectures": [
+    "Qwen3MoeForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "decoder_sparse_step": 1,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 6144,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 262144,
+  "max_window_layers": 62,
+  "mlp_only_layers": [],
+  "model_type": "qwen3_moe",
+  "moe_intermediate_size": 2560,
+  "norm_topk_prob": true,
+  "num_attention_heads": 96,
+  "num_experts": 160,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 62,
+  "num_key_value_heads": 8,
+  "output_router_logits": false,
+  "qkv_bias": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000000,
+  "router_aux_loss_coef": 0.0,
+  "shared_expert_intermediate_size": 0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_qk_norm": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
--- a/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/qwen3coder_tool_parser.py
+++ b/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/qwen3coder_tool_parser.py
@@ -0,0 +1,689 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import ast
+import json
+import uuid
+from collections.abc import Sequence
+from typing import Any, List, Optional, Union
+
+import regex as re
+
+from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
+                                              ChatCompletionToolsParam,
+                                              DeltaFunctionCall, DeltaMessage,
+                                              DeltaToolCall,
+                                              ExtractedToolCallInformation,
+                                              FunctionCall, ToolCall)
+from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
+    ToolParser, ToolParserManager)
+from vllm.logger import init_logger
+from vllm.transformers_utils.tokenizer import AnyTokenizer
+
+logger = init_logger(__name__)
+
+
+@ToolParserManager.register_module("qwen3_coder")
+class Qwen3CoderToolParser(ToolParser):
+
+    def __init__(self, tokenizer: AnyTokenizer):
+        super().__init__(tokenizer)
+
+        self.current_tool_name_sent: bool = False
+        self.prev_tool_call_arr: list[dict] = []
+        self.current_tool_id: int = -1
+        self.streamed_args_for_tool: list[str] = []
+
+        # Sentinel tokens for streaming mode
+        self.tool_call_start_token: str = "<tool_call>"
+        self.tool_call_end_token: str = "</tool_call>"
+        self.tool_call_prefix: str = "<function="
+        self.function_end_token: str = "</function>"
+        self.parameter_prefix: str = "<parameter="
+        self.parameter_end_token: str = "</parameter>"
+        self.is_tool_call_started: bool = False
+        self.failed_count: int = 0
+
+        # Enhanced streaming state - reset for each new message
+        self._reset_streaming_state()
+
+        # Regex patterns
+        self.tool_call_complete_regex = re.compile(
+            r"<tool_call>(.*?)</tool_call>", re.DOTALL)
+        self.tool_call_regex = re.compile(
+            r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL)
+        self.tool_call_function_regex = re.compile(
+            r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
+        self.tool_call_parameter_regex = re.compile(
+            r"<parameter=(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>)|$)",
+            re.DOTALL)
+
+        if not self.model_tokenizer:
+            raise ValueError(
+                "The model tokenizer must be passed to the ToolParser "
+                "constructor during construction.")
+
+        self.tool_call_start_token_id = self.vocab.get(
+            self.tool_call_start_token)
+        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
+
+        if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
+            raise RuntimeError(
+                "Qwen3 XML Tool parser could not locate tool call start/end "
+                "tokens in the tokenizer!")
+
+        logger.info(
+            f"vLLM Successfully import tool parser {self.__class__.__name__} !"
+        )
+
+    def _generate_tool_call_id(self) -> str:
+        """Generate a unique tool call ID."""
+        return f"call_{uuid.uuid4().hex[:24]}"
+
+    def _reset_streaming_state(self):
+        """Reset all streaming state."""
+        self.current_tool_index = 0
+        self.is_tool_call_started = False
+        self.header_sent = False
+        self.current_tool_id = None
+        self.current_function_name = None
+        self.current_param_name = None
+        self.current_param_value = ""
+        self.param_count = 0
+        self.in_param = False
+        self.in_function = False
+        self.accumulated_text = ""
+        self.json_started = False
+        self.json_closed = False
+        # Store accumulated parameters for type conversion
+        self.accumulated_params = {}
+        self.streaming_request = None
+
+    def _get_arguments_config(
+            self, func_name: str,
+            tools: Optional[list[ChatCompletionToolsParam]]) -> dict:
+        """Extract argument configuration for a function."""
+        if tools is None:
+            return {}
+        for config in tools:
+            if not hasattr(config, "type") or not (hasattr(
+                    config, "function") and hasattr(config.function, "name")):
+                continue
+            if config.type == "function" and config.function.name == func_name:
+                if not hasattr(config.function, "parameters"):
+                    return {}
+                params = config.function.parameters
+                if isinstance(params, dict) and "properties" in params:
+                    return params["properties"]
+                elif isinstance(params, dict):
+                    return params
+                else:
+                    return {}
+        logger.warning(f"Tool '{func_name}' is not defined in the tools list.")
+        return {}
+
+    def _convert_param_value(self, param_value: str, param_name: str,
+                             param_config: dict, func_name: str) -> Any:
+        """Convert parameter value based on its type in the schema."""
+        # Handle null value for any type
+        if param_value.lower() == "null":
+            return None
+
+        if param_name not in param_config:
+            if param_config != {}:
+                logger.warning(
+                    f"Parsed parameter '{param_name}' is not defined in the tool "
+                    f"parameters for tool '{func_name}', directly returning the string value."
+                )
+            return param_value
+
+        if isinstance(param_config[param_name],
+                      dict) and "type" in param_config[param_name]:
+            param_type = str(param_config[param_name]["type"]).strip().lower()
+        else:
+            param_type = "string"
+        if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
+            return param_value
+        elif param_type.startswith("int") or param_type.startswith(
+                "uint") or param_type.startswith(
+                    "long") or param_type.startswith(
+                        "short") or param_type.startswith("unsigned"):
+            try:
+                param_value = int(param_value)
+            except:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' is not an integer in tool "
+                    f"'{func_name}', degenerating to string.")
+            return param_value
+        elif param_type.startswith("num") or param_type.startswith("float"):
+            try:
+                float_param_value = float(param_value)
+                param_value = float_param_value if float_param_value - int(
+                    float_param_value) != 0 else int(float_param_value)
+            except:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' is not a float in tool "
+                    f"'{func_name}', degenerating to string.")
+            return param_value
+        elif param_type in ["boolean", "bool", "binary"]:
+            param_value = param_value.lower()
+            if param_value not in ["true", "false"]:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' is not a boolean (`true` of `false`) in tool '{func_name}', degenerating to false."
+                )
+            return param_value == "true"
+        else:
+            if param_type in ["object", "array", "arr"
+                              ] or param_type.startswith(
+                                  "dict") or param_type.startswith("list"):
+                try:
+                    param_value = json.loads(param_value)
+                    return param_value
+                except:
+                    logger.warning(
+                        f"Parsed value '{param_value}' of parameter '{param_name}' cannot be parsed with json.loads in tool "
+                        f"'{func_name}', will try other methods to parse it.")
+            try:
+                param_value = ast.literal_eval(param_value)  # safer
+            except:
+                logger.warning(
+                    f"Parsed value '{param_value}' of parameter '{param_name}' cannot be converted via Python `ast.literal_eval()` in tool '{func_name}', degenerating to string."
+                )
+            return param_value
+
+    def _parse_xml_function_call(
+            self, function_call_str: str,
+            tools: Optional[list[ChatCompletionToolsParam]]
+    ) -> Optional[ToolCall]:
+
+        # Extract function name
+        end_index = function_call_str.index(">")
+        function_name = function_call_str[:end_index]
+        param_config = self._get_arguments_config(function_name, tools)
+        parameters = function_call_str[end_index + 1:]
+        param_dict = {}
+        for match_text in self.tool_call_parameter_regex.findall(parameters):
+            idx = match_text.index(">")
+            param_name = match_text[:idx]
+            param_value = str(match_text[idx + 1:])
+            # Remove prefix and trailing \n
+            if param_value.startswith("\n"):
+                param_value = param_value[1:]
+            if param_value.endswith("\n"):
+                param_value = param_value[:-1]
+
+            param_dict[param_name] = self._convert_param_value(
+                param_value, param_name, param_config, function_name)
+        return ToolCall(
+            type="function",
+            function=FunctionCall(name=function_name,
+                                  arguments=json.dumps(param_dict,
+                                                       ensure_ascii=False)),
+        )
+
+    def _get_function_calls(self, model_output: str) -> List[str]:
+        # Find all tool calls
+        matched_ranges = self.tool_call_regex.findall(model_output)
+        raw_tool_calls = [
+            match[0] if match[0] else match[1] for match in matched_ranges
+        ]
+
+        # Back-off strategy if no tool_call tags found
+        if len(raw_tool_calls) == 0:
+            raw_tool_calls = [model_output]
+
+        raw_function_calls = []
+        for tool_call in raw_tool_calls:
+            raw_function_calls.extend(
+                self.tool_call_function_regex.findall(tool_call))
+
+        function_calls = [
+            match[0] if match[0] else match[1] for match in raw_function_calls
+        ]
+        return function_calls
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+    ) -> ExtractedToolCallInformation:
+        # Quick check to avoid unnecessary processing
+        if self.tool_call_prefix not in model_output:
+            return ExtractedToolCallInformation(tools_called=False,
+                                                tool_calls=[],
+                                                content=model_output)
+
+        try:
+            function_calls = self._get_function_calls(model_output)
+            if len(function_calls) == 0:
+                return ExtractedToolCallInformation(tools_called=False,
+                                                    tool_calls=[],
+                                                    content=model_output)
+
+            tool_calls = [
+                self._parse_xml_function_call(function_call_str, request.tools)
+                for function_call_str in function_calls
+            ]
+
+            # Populate prev_tool_call_arr for serving layer to set finish_reason
+            self.prev_tool_call_arr.clear()  # Clear previous calls
+            for tool_call in tool_calls:
+                if tool_call:
+                    self.prev_tool_call_arr.append({
+                        "name":
+                        tool_call.function.name,
+                        "arguments":
+                        tool_call.function.arguments,
+                    })
+
+            # Extract content before tool calls
+            content_index = model_output.find(self.tool_call_start_token)
+            content_index = content_index if content_index >= 0 else model_output.find(
+                self.tool_call_prefix)
+            content = model_output[:content_index]  # .rstrip()
+
+            return ExtractedToolCallInformation(
+                tools_called=(len(tool_calls) > 0),
+                tool_calls=tool_calls,
+                content=content if content else None,
+            )
+
+        except Exception:
+            logger.exception("Error in extracting tool call from response.")
+            return ExtractedToolCallInformation(tools_called=False,
+                                                tool_calls=[],
+                                                content=model_output)
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> Union[DeltaMessage, None]:
+        # Store request for type conversion
+        if not previous_text:
+            self._reset_streaming_state()
+            self.streaming_request = request
+
+        # If no delta text, return None unless it's an EOS token after tool calls
+        if not delta_text:
+            # Check if this is an EOS token after all tool calls are complete
+            # We check for tool calls in the text even if is_tool_call_started is False
+            # because it might have been reset after processing all tools
+            if delta_token_ids and self.tool_call_end_token_id not in delta_token_ids:
+                # Count complete tool calls
+                complete_calls = len(
+                    self.tool_call_complete_regex.findall(current_text))
+
+                # If we have completed tool calls and populated prev_tool_call_arr
+                if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
+                    # Check if all tool calls are closed
+                    open_calls = current_text.count(
+                        self.tool_call_start_token) - current_text.count(
+                            self.tool_call_end_token)
+                    if open_calls == 0:
+                        # Return empty delta message to allow finish_reason processing
+                        return DeltaMessage(content="")
+                elif not self.is_tool_call_started and current_text:
+                    # This is a regular content response that's now complete
+                    return DeltaMessage(content="")
+            return None
+
+        # Update accumulated text
+        self.accumulated_text = current_text
+
+        # Check if we need to advance to next tool
+        if self.json_closed and not self.in_function:
+            # Check if this tool call has ended
+            tool_ends = current_text.count(self.tool_call_end_token)
+            if tool_ends > self.current_tool_index:
+                # This tool has ended, advance to next
+                self.current_tool_index += 1
+                self.header_sent = False
+                self.param_count = 0
+                self.json_started = False
+                self.json_closed = False
+                self.accumulated_params = {}
+
+                # Check if there are more tool calls
+                tool_starts = current_text.count(self.tool_call_start_token)
+                if self.current_tool_index >= tool_starts:
+                    # No more tool calls
+                    self.is_tool_call_started = False
+                # Continue processing next tool
+                return None
+
+        # Handle normal content before tool calls
+        if not self.is_tool_call_started:
+            # Check if tool call is starting
+            if self.tool_call_start_token_id in delta_token_ids or self.tool_call_start_token in delta_text:
+                self.is_tool_call_started = True
+                # Return any content before the tool call
+                if self.tool_call_start_token in delta_text:
+                    content_before = delta_text[:delta_text.index(
+                        self.tool_call_start_token)]
+                    if content_before:
+                        return DeltaMessage(content=content_before)
+                return None
+            else:
+                # Check if we're between tool calls - skip whitespace
+                if current_text.rstrip().endswith(self.tool_call_end_token):
+                    # We just ended a tool call, skip whitespace
+                    if delta_text.strip() == "":
+                        return None
+                # Normal content, no tool call
+                return DeltaMessage(content=delta_text)
+
+        # Check if we're between tool calls (waiting for next one)
+        # Count tool calls we've seen vs processed
+        tool_starts_count = current_text.count(self.tool_call_start_token)
+        if self.current_tool_index >= tool_starts_count:
+            # We're past all tool calls, shouldn't be here
+            return None
+
+        # We're in a tool call, find the current tool call portion
+        # Need to find the correct tool call based on current_tool_index
+        tool_starts = []
+        idx = 0
+        while True:
+            idx = current_text.find(self.tool_call_start_token, idx)
+            if idx == -1:
+                break
+            tool_starts.append(idx)
+            idx += len(self.tool_call_start_token)
+
+        if self.current_tool_index >= len(tool_starts):
+            # No more tool calls to process yet
+            return None
+
+        tool_start_idx = tool_starts[self.current_tool_index]
+        # Find where this tool call ends (or current position if not ended yet)
+        tool_end_idx = current_text.find(self.tool_call_end_token,
+                                         tool_start_idx)
+        if tool_end_idx == -1:
+            tool_text = current_text[tool_start_idx:]
+        else:
+            tool_text = current_text[tool_start_idx:tool_end_idx +
+                                     len(self.tool_call_end_token)]
+
+        # Looking for function header
+        if not self.header_sent:
+            if self.tool_call_prefix in tool_text:
+                func_start = tool_text.find(self.tool_call_prefix) + len(
+                    self.tool_call_prefix)
+                func_end = tool_text.find(">", func_start)
+
+                if func_end != -1:
+                    # Found complete function name
+                    self.current_function_name = tool_text[func_start:func_end]
+                    self.current_tool_id = self._generate_tool_call_id()
+                    self.header_sent = True
+                    self.in_function = True
+
+                    # IMPORTANT: Add to prev_tool_call_arr immediately when we detect a tool call
+                    # This ensures finish_reason="tool_calls" even if parsing isn't complete
+                    already_added = any(
+                        tool.get("name") == self.current_function_name
+                        for tool in self.prev_tool_call_arr)
+                    if not already_added:
+                        self.prev_tool_call_arr.append({
+                            "name": self.current_function_name,
+                            "arguments":
+                            "{}",  # Placeholder, will be updated later
+                        })
+
+                    # Send header with function info
+                    return DeltaMessage(tool_calls=[
+                        DeltaToolCall(
+                            index=self.current_tool_index,
+                            id=self.current_tool_id,
+                            function=DeltaFunctionCall(
+                                name=self.current_function_name, arguments=""),
+                            type="function",
+                        )
+                    ])
+            return None
+
+        # We've sent header, now handle function body
+        if self.in_function:
+            # Send opening brace if not sent yet
+            if not self.json_started and self.parameter_prefix not in delta_text:
+                self.json_started = True
+                return DeltaMessage(tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_index,
+                        function=DeltaFunctionCall(arguments="{"),
+                    )
+                ])
+
+            # Make sure json_started is set if we're processing parameters
+            if not self.json_started:
+                self.json_started = True
+
+            # Check for function end in accumulated text
+            if not self.json_closed and self.function_end_token in tool_text:
+                # Close JSON
+                self.json_closed = True
+
+                # Extract the complete tool call to update prev_tool_call_arr with final arguments
+                # Find the function content
+                func_start = tool_text.find(self.tool_call_prefix) + len(
+                    self.tool_call_prefix)
+                func_content_end = tool_text.find(self.function_end_token,
+                                                  func_start)
+                if func_content_end != -1:
+                    func_content = tool_text[func_start:func_content_end]
+                    # Parse to get the complete arguments
+                    try:
+                        parsed_tool = self._parse_xml_function_call(
+                            func_content, self.streaming_request.tools
+                            if self.streaming_request else None)
+                        if parsed_tool:
+                            # Update existing entry in prev_tool_call_arr with complete arguments
+                            for i, tool in enumerate(self.prev_tool_call_arr):
+                                if tool.get(
+                                        "name") == parsed_tool.function.name:
+                                    self.prev_tool_call_arr[i][
+                                        "arguments"] = parsed_tool.function.arguments
+                                    break
+                    except Exception:
+                        pass  # Ignore parsing errors during streaming
+
+                result = DeltaMessage(tool_calls=[
+                    DeltaToolCall(
+                        index=self.current_tool_index,
+                        function=DeltaFunctionCall(arguments="}"),
+                    )
+                ])
+
+                # Reset state for next tool
+                self.in_function = False
+                self.json_closed = True
+                self.accumulated_params = {}
+
+                return result
+
+            # Look for parameters
+            # Find all parameter starts
+            param_starts = []
+            idx = 0
+            while True:
+                idx = tool_text.find(self.parameter_prefix, idx)
+                if idx == -1:
+                    break
+                param_starts.append(idx)
+                idx += len(self.parameter_prefix)
+
+            # Check if we should start a new parameter
+            if not self.in_param and self.param_count < len(param_starts):
+
+                if len(param_starts) > self.param_count:
+                    # Process the next parameter
+                    param_idx = param_starts[self.param_count]
+                    param_start = param_idx + len(self.parameter_prefix)
+                    remaining = tool_text[param_start:]
+
+                    if ">" in remaining:
+                        # We have the complete parameter name
+                        name_end = remaining.find(">")
+                        self.current_param_name = remaining[:name_end]
+
+                        # Find the parameter value
+                        value_start = param_start + name_end + 1
+                        value_text = tool_text[value_start:]
+                        if value_text.startswith("\n"):
+                            value_text = value_text[1:]
+
+                        # Find where this parameter ends
+                        param_end_idx = value_text.find(
+                            self.parameter_end_token)
+                        if param_end_idx == -1:
+                            # No closing tag, look for next parameter or function end
+                            next_param_idx = value_text.find(
+                                self.parameter_prefix)
+                            func_end_idx = value_text.find(
+                                self.function_end_token)
+
+                            if next_param_idx != -1 and (func_end_idx == -1
+                                                         or next_param_idx
+                                                         < func_end_idx):
+                                param_end_idx = next_param_idx
+                            elif func_end_idx != -1:
+                                param_end_idx = func_end_idx
+                            else:
+                                # Neither found, check if tool call is complete
+                                if self.tool_call_end_token in tool_text:
+                                    # Tool call is complete, so parameter must be complete too
+                                    # Use all remaining text before function end as value
+                                    param_end_idx = len(value_text)
+                                else:
+                                    # Still streaming, wait for more content
+                                    return None
+
+                        if param_end_idx != -1:
+                            # Complete parameter found
+                            param_value = value_text[:param_end_idx]
+                            if param_value.endswith("\n"):
+                                param_value = param_value[:-1]
+
+                            # Store raw value for later processing
+                            self.accumulated_params[
+                                self.current_param_name] = param_value
+
+                            # Get parameter configuration for type conversion
+                            param_config = self._get_arguments_config(
+                                self.current_function_name,
+                                self.streaming_request.tools
+                                if self.streaming_request else None)
+
+                            # Convert the parameter value to the appropriate type
+                            converted_value = self._convert_param_value(
+                                param_value, self.current_param_name,
+                                param_config, self.current_function_name)
+
+                            # Build JSON fragment based on the converted type
+                            # Use json.dumps to properly serialize the value
+                            serialized_value = json.dumps(converted_value,
+                                                          ensure_ascii=False)
+
+                            if self.param_count == 0:
+                                json_fragment = f'"{self.current_param_name}": {serialized_value}'
+                            else:
+                                json_fragment = f', "{self.current_param_name}": {serialized_value}'
+
+                            self.param_count += 1
+
+                            return DeltaMessage(tool_calls=[
+                                DeltaToolCall(
+                                    index=self.current_tool_index,
+                                    function=DeltaFunctionCall(
+                                        arguments=json_fragment),
+                                )
+                            ])
+
+            # Continue parameter value - Not used in the current implementation
+            # since we process complete parameters above
+            if self.in_param:
+                if self.parameter_end_token in delta_text:
+                    # End of parameter
+                    end_idx = delta_text.find(self.parameter_end_token)
+                    value_chunk = delta_text[:end_idx]
+
+                    # Skip past > if at start
+                    if not self.current_param_value and ">" in value_chunk:
+                        gt_idx = value_chunk.find(">")
+                        value_chunk = value_chunk[gt_idx + 1:]
+
+                    if not self.current_param_value and value_chunk.startswith(
+                            "\n"):
+                        value_chunk = value_chunk[1:]
+
+                    # Store complete value
+                    full_value = self.current_param_value + value_chunk
+                    self.accumulated_params[
+                        self.current_param_name] = full_value
+
+                    # Get parameter configuration for type conversion
+                    param_config = self._get_arguments_config(
+                        self.current_function_name,
+                        self.streaming_request.tools
+                        if self.streaming_request else None)
+
+                    # Convert the parameter value to the appropriate type
+                    converted_value = self._convert_param_value(
+                        full_value, self.current_param_name, param_config,
+                        self.current_function_name)
+
+                    # Serialize the converted value
+                    serialized_value = json.dumps(converted_value,
+                                                  ensure_ascii=False)
+
+                    # Since we've been streaming the quoted version, we need to close it properly
+                    # This is complex - for now just complete the value
+                    self.in_param = False
+                    self.current_param_value = ""
+
+                    # Just close the current parameter string
+                    return DeltaMessage(tool_calls=[
+                        DeltaToolCall(
+                            index=self.current_tool_index,
+                            function=DeltaFunctionCall(
+                                arguments='"'),  # Close the string quote
+                        )
+                    ])
+                else:
+                    # Continue accumulating value
+                    value_chunk = delta_text
+
+                    # Handle first chunk after param name
+                    if not self.current_param_value and ">" in value_chunk:
+                        gt_idx = value_chunk.find(">")
+                        value_chunk = value_chunk[gt_idx + 1:]
+
+                    if not self.current_param_value and value_chunk.startswith(
+                            "\n"):
+                        value_chunk = value_chunk[1:]
+
+                    if value_chunk:
+                        # Stream the escaped delta
+                        prev_escaped = json.dumps(
+                            self.current_param_value, ensure_ascii=False
+                        )[1:-1] if self.current_param_value else ""
+                        self.current_param_value += value_chunk
+                        full_escaped = json.dumps(self.current_param_value,
+                                                  ensure_ascii=False)[1:-1]
+                        delta_escaped = full_escaped[len(prev_escaped):]
+
+                        if delta_escaped:
+                            return DeltaMessage(tool_calls=[
+                                DeltaToolCall(
+                                    index=self.current_tool_index,
+                                    function=DeltaFunctionCall(
+                                        arguments=delta_escaped),
+                                )
+                            ])
+
+        return None
--- a/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/tokenizer.json
+++ b/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/tokenizer.json
--- a/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/tokenizer_config.json
+++ b/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/tokenizer_config.json
--- a/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/vocab.json
+++ b/trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/vocab.json
--- a/trace_model_meta/ZhipuAI/GLM-5-FP8/chat_template.jinja
+++ b/trace_model_meta/ZhipuAI/GLM-5-FP8/chat_template.jinja
@@ -0,0 +1,86 @@
+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {%- set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{%- for m in messages -%}
+{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
+{{ '<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '</think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{- '<tool_call>' + tc.name -}}
+{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '<tool_response>' }}
+{{- m.content }}
+{{- '</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
+{%- endif -%}
--- a/trace_model_meta/ZhipuAI/GLM-5-FP8/config.json
+++ b/trace_model_meta/ZhipuAI/GLM-5-FP8/config.json
@@ -0,0 +1,782 @@
+{
+  "architectures": [
+    "GlmMoeDsaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "eos_token_id": [
+    154820,
+    154827,
+    154829
+  ],
+  "ep_size": 1,
+  "first_k_dense_replace": 3,
+  "hidden_act": "silu",
+  "head_dim": 64,
+  "hidden_size": 6144,
+  "index_head_dim": 128,
+  "index_n_heads": 32,
+  "index_topk": 2048,
+  "indexer_rope_interleave": true,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "kv_lora_rank": 512,
+  "max_position_embeddings": 202752,
+  "moe_intermediate_size": 2048,
+  "moe_layer_freq": 1,
+  "model_type": "glm_moe_dsa",
+  "n_group": 1,
+  "n_routed_experts": 256,
+  "n_shared_experts": 1,
+  "norm_topk_prob": true,
+  "num_attention_heads": 64,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 78,
+  "num_key_value_heads": 64,
+  "num_nextn_predict_layers": 1,
+  "pad_token_id": 154820,
+  "pretraining_tp": 1,
+  "q_lora_rank": 2048,
+  "qk_head_dim": 256,
+  "qk_nope_head_dim": 192,
+  "qk_rope_head_dim": 64,
+  "rms_norm_eps": 1e-05,
+  "rope_interleave": true,
+  "rope_parameters": {
+    "rope_theta": 1000000,
+    "rope_type": "default"
+  },
+  "routed_scaling_factor": 2.5,
+  "scoring_func": "sigmoid",
+  "tie_word_embeddings": false,
+  "topk_group": 1,
+  "topk_method": "noaux_tc",
+  "transformers_version": "5.0.2.dev0",
+  "use_cache": true,
+  "v_head_dim": 256,
+  "vocab_size": 154880,
+  "quantization_config": {
+    "activation_scheme": "dynamic",
+    "fmt": "e4m3",
+    "quant_method": "fp8",
+    "weight_block_size": [
+      128,
+      128
+    ],
+    "modules_to_not_convert": [
+      "lm_head",
+      "model.embed_tokens",
+      "model.layers.0.input_layernorm",
+      "model.layers.0.post_attention_layernorm",
+      "model.layers.0.self_attn.indexer.k_norm",
+      "model.layers.0.self_attn.indexer.k_norm.bias",
+      "model.layers.0.self_attn.indexers_proj",
+      "model.layers.0.self_attn.kv_a_layernorm",
+      "model.layers.0.self_attn.q_a_layernorm",
+      "model.layers.1.input_layernorm",
+      "model.layers.1.post_attention_layernorm",
+      "model.layers.1.self_attn.indexer.k_norm",
+      "model.layers.1.self_attn.indexer.k_norm.bias",
+      "model.layers.1.self_attn.indexers_proj",
+      "model.layers.1.self_attn.kv_a_layernorm",
+      "model.layers.1.self_attn.q_a_layernorm",
+      "model.layers.2.input_layernorm",
+      "model.layers.2.post_attention_layernorm",
+      "model.layers.2.self_attn.indexer.k_norm",
+      "model.layers.2.self_attn.indexer.k_norm.bias",
+      "model.layers.2.self_attn.indexers_proj",
+      "model.layers.2.self_attn.kv_a_layernorm",
+      "model.layers.2.self_attn.q_a_layernorm",
+      "model.layers.3.input_layernorm",
+      "model.layers.3.mlp.gate",
+      "model.layers.3.mlp.gate.e_score_correction_bias",
+      "model.layers.3.post_attention_layernorm",
+      "model.layers.3.self_attn.indexer.k_norm",
+      "model.layers.3.self_attn.indexer.k_norm.bias",
+      "model.layers.3.self_attn.indexers_proj",
+      "model.layers.3.self_attn.kv_a_layernorm",
+      "model.layers.3.self_attn.q_a_layernorm",
+      "model.layers.4.input_layernorm",
+      "model.layers.4.mlp.gate",
+      "model.layers.4.mlp.gate.e_score_correction_bias",
+      "model.layers.4.post_attention_layernorm",
+      "model.layers.4.self_attn.indexer.k_norm",
+      "model.layers.4.self_attn.indexer.k_norm.bias",
+      "model.layers.4.self_attn.indexers_proj",
+      "model.layers.4.self_attn.kv_a_layernorm",
+      "model.layers.4.self_attn.q_a_layernorm",
+      "model.layers.5.input_layernorm",
+      "model.layers.5.mlp.gate",
+      "model.layers.5.mlp.gate.e_score_correction_bias",
+      "model.layers.5.post_attention_layernorm",
+      "model.layers.5.self_attn.indexer.k_norm",
+      "model.layers.5.self_attn.indexer.k_norm.bias",
+      "model.layers.5.self_attn.indexers_proj",
+      "model.layers.5.self_attn.kv_a_layernorm",
+      "model.layers.5.self_attn.q_a_layernorm",
+      "model.layers.6.input_layernorm",
+      "model.layers.6.mlp.gate",
+      "model.layers.6.mlp.gate.e_score_correction_bias",
+      "model.layers.6.post_attention_layernorm",
+      "model.layers.6.self_attn.indexer.k_norm",
+      "model.layers.6.self_attn.indexer.k_norm.bias",
+      "model.layers.6.self_attn.indexers_proj",
+      "model.layers.6.self_attn.kv_a_layernorm",
+      "model.layers.6.self_attn.q_a_layernorm",
+      "model.layers.7.input_layernorm",
+      "model.layers.7.mlp.gate",
+      "model.layers.7.mlp.gate.e_score_correction_bias",
+      "model.layers.7.post_attention_layernorm",
+      "model.layers.7.self_attn.indexer.k_norm",
+      "model.layers.7.self_attn.indexer.k_norm.bias",
+      "model.layers.7.self_attn.indexers_proj",
+      "model.layers.7.self_attn.kv_a_layernorm",
+      "model.layers.7.self_attn.q_a_layernorm",
+      "model.layers.8.input_layernorm",
+      "model.layers.8.mlp.gate",
+      "model.layers.8.mlp.gate.e_score_correction_bias",
+      "model.layers.8.post_attention_layernorm",
+      "model.layers.8.self_attn.indexer.k_norm",
+      "model.layers.8.self_attn.indexer.k_norm.bias",
+      "model.layers.8.self_attn.indexers_proj",
+      "model.layers.8.self_attn.kv_a_layernorm",
+      "model.layers.8.self_attn.q_a_layernorm",
+      "model.layers.9.input_layernorm",
+      "model.layers.9.mlp.gate",
+      "model.layers.9.mlp.gate.e_score_correction_bias",
+      "model.layers.9.post_attention_layernorm",
+      "model.layers.9.self_attn.indexer.k_norm",
+      "model.layers.9.self_attn.indexer.k_norm.bias",
+      "model.layers.9.self_attn.indexers_proj",
+      "model.layers.9.self_attn.kv_a_layernorm",
+      "model.layers.9.self_attn.q_a_layernorm",
+      "model.layers.10.input_layernorm",
+      "model.layers.10.mlp.gate",
+      "model.layers.10.mlp.gate.e_score_correction_bias",
+      "model.layers.10.post_attention_layernorm",
+      "model.layers.10.self_attn.indexer.k_norm",
+      "model.layers.10.self_attn.indexer.k_norm.bias",
+      "model.layers.10.self_attn.indexers_proj",
+      "model.layers.10.self_attn.kv_a_layernorm",
+      "model.layers.10.self_attn.q_a_layernorm",
+      "model.layers.11.input_layernorm",
+      "model.layers.11.mlp.gate",
+      "model.layers.11.mlp.gate.e_score_correction_bias",
+      "model.layers.11.post_attention_layernorm",
+      "model.layers.11.self_attn.indexer.k_norm",
+      "model.layers.11.self_attn.indexer.k_norm.bias",
+      "model.layers.11.self_attn.indexers_proj",
+      "model.layers.11.self_attn.kv_a_layernorm",
+      "model.layers.11.self_attn.q_a_layernorm",
+      "model.layers.12.input_layernorm",
+      "model.layers.12.mlp.gate",
+      "model.layers.12.mlp.gate.e_score_correction_bias",
+      "model.layers.12.post_attention_layernorm",
+      "model.layers.12.self_attn.indexer.k_norm",
+      "model.layers.12.self_attn.indexer.k_norm.bias",
+      "model.layers.12.self_attn.indexers_proj",
+      "model.layers.12.self_attn.kv_a_layernorm",
+      "model.layers.12.self_attn.q_a_layernorm",
+      "model.layers.13.input_layernorm",
+      "model.layers.13.mlp.gate",
+      "model.layers.13.mlp.gate.e_score_correction_bias",
+      "model.layers.13.post_attention_layernorm",
+      "model.layers.13.self_attn.indexer.k_norm",
+      "model.layers.13.self_attn.indexer.k_norm.bias",
+      "model.layers.13.self_attn.indexers_proj",
+      "model.layers.13.self_attn.kv_a_layernorm",
+      "model.layers.13.self_attn.q_a_layernorm",
+      "model.layers.14.input_layernorm",
+      "model.layers.14.mlp.gate",
+      "model.layers.14.mlp.gate.e_score_correction_bias",
+      "model.layers.14.post_attention_layernorm",
+      "model.layers.14.self_attn.indexer.k_norm",
+      "model.layers.14.self_attn.indexer.k_norm.bias",
+      "model.layers.14.self_attn.indexers_proj",
+      "model.layers.14.self_attn.kv_a_layernorm",
+      "model.layers.14.self_attn.q_a_layernorm",
+      "model.layers.15.input_layernorm",
+      "model.layers.15.mlp.gate",
+      "model.layers.15.mlp.gate.e_score_correction_bias",
+      "model.layers.15.post_attention_layernorm",
+      "model.layers.15.self_attn.indexer.k_norm",
+      "model.layers.15.self_attn.indexer.k_norm.bias",
+      "model.layers.15.self_attn.indexers_proj",
+      "model.layers.15.self_attn.kv_a_layernorm",
+      "model.layers.15.self_attn.q_a_layernorm",
+      "model.layers.16.input_layernorm",
+      "model.layers.16.mlp.gate",
+      "model.layers.16.mlp.gate.e_score_correction_bias",
+      "model.layers.16.post_attention_layernorm",
+      "model.layers.16.self_attn.indexer.k_norm",
+      "model.layers.16.self_attn.indexer.k_norm.bias",
+      "model.layers.16.self_attn.indexers_proj",
+      "model.layers.16.self_attn.kv_a_layernorm",
+      "model.layers.16.self_attn.q_a_layernorm",
+      "model.layers.17.input_layernorm",
+      "model.layers.17.mlp.gate",
+      "model.layers.17.mlp.gate.e_score_correction_bias",
+      "model.layers.17.post_attention_layernorm",
+      "model.layers.17.self_attn.indexer.k_norm",
+      "model.layers.17.self_attn.indexer.k_norm.bias",
+      "model.layers.17.self_attn.indexers_proj",
+      "model.layers.17.self_attn.kv_a_layernorm",
+      "model.layers.17.self_attn.q_a_layernorm",
+      "model.layers.18.input_layernorm",
+      "model.layers.18.mlp.gate",
+      "model.layers.18.mlp.gate.e_score_correction_bias",
+      "model.layers.18.post_attention_layernorm",
+      "model.layers.18.self_attn.indexer.k_norm",
+      "model.layers.18.self_attn.indexer.k_norm.bias",
+      "model.layers.18.self_attn.indexers_proj",
+      "model.layers.18.self_attn.kv_a_layernorm",
+      "model.layers.18.self_attn.q_a_layernorm",
+      "model.layers.19.input_layernorm",
+      "model.layers.19.mlp.gate",
+      "model.layers.19.mlp.gate.e_score_correction_bias",
+      "model.layers.19.post_attention_layernorm",
+      "model.layers.19.self_attn.indexer.k_norm",
+      "model.layers.19.self_attn.indexer.k_norm.bias",
+      "model.layers.19.self_attn.indexers_proj",
+      "model.layers.19.self_attn.kv_a_layernorm",
+      "model.layers.19.self_attn.q_a_layernorm",
+      "model.layers.20.input_layernorm",
+      "model.layers.20.mlp.gate",
+      "model.layers.20.mlp.gate.e_score_correction_bias",
+      "model.layers.20.post_attention_layernorm",
+      "model.layers.20.self_attn.indexer.k_norm",
+      "model.layers.20.self_attn.indexer.k_norm.bias",
+      "model.layers.20.self_attn.indexers_proj",
+      "model.layers.20.self_attn.kv_a_layernorm",
+      "model.layers.20.self_attn.q_a_layernorm",
+      "model.layers.21.input_layernorm",
+      "model.layers.21.mlp.gate",
+      "model.layers.21.mlp.gate.e_score_correction_bias",
+      "model.layers.21.post_attention_layernorm",
+      "model.layers.21.self_attn.indexer.k_norm",
+      "model.layers.21.self_attn.indexer.k_norm.bias",
+      "model.layers.21.self_attn.indexers_proj",
+      "model.layers.21.self_attn.kv_a_layernorm",
+      "model.layers.21.self_attn.q_a_layernorm",
+      "model.layers.22.input_layernorm",
+      "model.layers.22.mlp.gate",
+      "model.layers.22.mlp.gate.e_score_correction_bias",
+      "model.layers.22.post_attention_layernorm",
+      "model.layers.22.self_attn.indexer.k_norm",
+      "model.layers.22.self_attn.indexer.k_norm.bias",
+      "model.layers.22.self_attn.indexers_proj",
+      "model.layers.22.self_attn.kv_a_layernorm",
+      "model.layers.22.self_attn.q_a_layernorm",
+      "model.layers.23.input_layernorm",
+      "model.layers.23.mlp.gate",
+      "model.layers.23.mlp.gate.e_score_correction_bias",
+      "model.layers.23.post_attention_layernorm",
+      "model.layers.23.self_attn.indexer.k_norm",
+      "model.layers.23.self_attn.indexer.k_norm.bias",
+      "model.layers.23.self_attn.indexers_proj",
+      "model.layers.23.self_attn.kv_a_layernorm",
+      "model.layers.23.self_attn.q_a_layernorm",
+      "model.layers.24.input_layernorm",
+      "model.layers.24.mlp.gate",
+      "model.layers.24.mlp.gate.e_score_correction_bias",
+      "model.layers.24.post_attention_layernorm",
+      "model.layers.24.self_attn.indexer.k_norm",
+      "model.layers.24.self_attn.indexer.k_norm.bias",
+      "model.layers.24.self_attn.indexers_proj",
+      "model.layers.24.self_attn.kv_a_layernorm",
+      "model.layers.24.self_attn.q_a_layernorm",
+      "model.layers.25.input_layernorm",
+      "model.layers.25.mlp.gate",
+      "model.layers.25.mlp.gate.e_score_correction_bias",
+      "model.layers.25.post_attention_layernorm",
+      "model.layers.25.self_attn.indexer.k_norm",
+      "model.layers.25.self_attn.indexer.k_norm.bias",
+      "model.layers.25.self_attn.indexers_proj",
+      "model.layers.25.self_attn.kv_a_layernorm",
+      "model.layers.25.self_attn.q_a_layernorm",
+      "model.layers.26.input_layernorm",
+      "model.layers.26.mlp.gate",
+      "model.layers.26.mlp.gate.e_score_correction_bias",
+      "model.layers.26.post_attention_layernorm",
+      "model.layers.26.self_attn.indexer.k_norm",
+      "model.layers.26.self_attn.indexer.k_norm.bias",
+      "model.layers.26.self_attn.indexers_proj",
+      "model.layers.26.self_attn.kv_a_layernorm",
+      "model.layers.26.self_attn.q_a_layernorm",
+      "model.layers.27.input_layernorm",
+      "model.layers.27.mlp.gate",
+      "model.layers.27.mlp.gate.e_score_correction_bias",
+      "model.layers.27.post_attention_layernorm",
+      "model.layers.27.self_attn.indexer.k_norm",
+      "model.layers.27.self_attn.indexer.k_norm.bias",
+      "model.layers.27.self_attn.indexers_proj",
+      "model.layers.27.self_attn.kv_a_layernorm",
+      "model.layers.27.self_attn.q_a_layernorm",
+      "model.layers.28.input_layernorm",
+      "model.layers.28.mlp.gate",
+      "model.layers.28.mlp.gate.e_score_correction_bias",
+      "model.layers.28.post_attention_layernorm",
+      "model.layers.28.self_attn.indexer.k_norm",
+      "model.layers.28.self_attn.indexer.k_norm.bias",
+      "model.layers.28.self_attn.indexers_proj",
+      "model.layers.28.self_attn.kv_a_layernorm",
+      "model.layers.28.self_attn.q_a_layernorm",
+      "model.layers.29.input_layernorm",
+      "model.layers.29.mlp.gate",
+      "model.layers.29.mlp.gate.e_score_correction_bias",
+      "model.layers.29.post_attention_layernorm",
+      "model.layers.29.self_attn.indexer.k_norm",
+      "model.layers.29.self_attn.indexer.k_norm.bias",
+      "model.layers.29.self_attn.indexers_proj",
+      "model.layers.29.self_attn.kv_a_layernorm",
+      "model.layers.29.self_attn.q_a_layernorm",
+      "model.layers.30.input_layernorm",
+      "model.layers.30.mlp.gate",
+      "model.layers.30.mlp.gate.e_score_correction_bias",
+      "model.layers.30.post_attention_layernorm",
+      "model.layers.30.self_attn.indexer.k_norm",
+      "model.layers.30.self_attn.indexer.k_norm.bias",
+      "model.layers.30.self_attn.indexers_proj",
+      "model.layers.30.self_attn.kv_a_layernorm",
+      "model.layers.30.self_attn.q_a_layernorm",
+      "model.layers.31.input_layernorm",
+      "model.layers.31.mlp.gate",
+      "model.layers.31.mlp.gate.e_score_correction_bias",
+      "model.layers.31.post_attention_layernorm",
+      "model.layers.31.self_attn.indexer.k_norm",
+      "model.layers.31.self_attn.indexer.k_norm.bias",
+      "model.layers.31.self_attn.indexers_proj",
+      "model.layers.31.self_attn.kv_a_layernorm",
+      "model.layers.31.self_attn.q_a_layernorm",
+      "model.layers.32.input_layernorm",
+      "model.layers.32.mlp.gate",
+      "model.layers.32.mlp.gate.e_score_correction_bias",
+      "model.layers.32.post_attention_layernorm",
+      "model.layers.32.self_attn.indexer.k_norm",
+      "model.layers.32.self_attn.indexer.k_norm.bias",
+      "model.layers.32.self_attn.indexers_proj",
+      "model.layers.32.self_attn.kv_a_layernorm",
+      "model.layers.32.self_attn.q_a_layernorm",
+      "model.layers.33.input_layernorm",
+      "model.layers.33.mlp.gate",
+      "model.layers.33.mlp.gate.e_score_correction_bias",
+      "model.layers.33.post_attention_layernorm",
+      "model.layers.33.self_attn.indexer.k_norm",
+      "model.layers.33.self_attn.indexer.k_norm.bias",
+      "model.layers.33.self_attn.indexers_proj",
+      "model.layers.33.self_attn.kv_a_layernorm",
+      "model.layers.33.self_attn.q_a_layernorm",
+      "model.layers.34.input_layernorm",
+      "model.layers.34.mlp.gate",
+      "model.layers.34.mlp.gate.e_score_correction_bias",
+      "model.layers.34.post_attention_layernorm",
+      "model.layers.34.self_attn.indexer.k_norm",
+      "model.layers.34.self_attn.indexer.k_norm.bias",
+      "model.layers.34.self_attn.indexers_proj",
+      "model.layers.34.self_attn.kv_a_layernorm",
+      "model.layers.34.self_attn.q_a_layernorm",
+      "model.layers.35.input_layernorm",
+      "model.layers.35.mlp.gate",
+      "model.layers.35.mlp.gate.e_score_correction_bias",
+      "model.layers.35.post_attention_layernorm",
+      "model.layers.35.self_attn.indexer.k_norm",
+      "model.layers.35.self_attn.indexer.k_norm.bias",
+      "model.layers.35.self_attn.indexers_proj",
+      "model.layers.35.self_attn.kv_a_layernorm",
+      "model.layers.35.self_attn.q_a_layernorm",
+      "model.layers.36.input_layernorm",
+      "model.layers.36.mlp.gate",
+      "model.layers.36.mlp.gate.e_score_correction_bias",
+      "model.layers.36.post_attention_layernorm",
+      "model.layers.36.self_attn.indexer.k_norm",
+      "model.layers.36.self_attn.indexer.k_norm.bias",
+      "model.layers.36.self_attn.indexers_proj",
+      "model.layers.36.self_attn.kv_a_layernorm",
+      "model.layers.36.self_attn.q_a_layernorm",
+      "model.layers.37.input_layernorm",
+      "model.layers.37.mlp.gate",
+      "model.layers.37.mlp.gate.e_score_correction_bias",
+      "model.layers.37.post_attention_layernorm",
+      "model.layers.37.self_attn.indexer.k_norm",
+      "model.layers.37.self_attn.indexer.k_norm.bias",
+      "model.layers.37.self_attn.indexers_proj",
+      "model.layers.37.self_attn.kv_a_layernorm",
+      "model.layers.37.self_attn.q_a_layernorm",
+      "model.layers.38.input_layernorm",
+      "model.layers.38.mlp.gate",
+      "model.layers.38.mlp.gate.e_score_correction_bias",
+      "model.layers.38.post_attention_layernorm",
+      "model.layers.38.self_attn.indexer.k_norm",
+      "model.layers.38.self_attn.indexer.k_norm.bias",
+      "model.layers.38.self_attn.indexers_proj",
+      "model.layers.38.self_attn.kv_a_layernorm",
+      "model.layers.38.self_attn.q_a_layernorm",
+      "model.layers.39.input_layernorm",
+      "model.layers.39.mlp.gate",
+      "model.layers.39.mlp.gate.e_score_correction_bias",
+      "model.layers.39.post_attention_layernorm",
+      "model.layers.39.self_attn.indexer.k_norm",
+      "model.layers.39.self_attn.indexer.k_norm.bias",
+      "model.layers.39.self_attn.indexers_proj",
+      "model.layers.39.self_attn.kv_a_layernorm",
+      "model.layers.39.self_attn.q_a_layernorm",
+      "model.layers.40.input_layernorm",
+      "model.layers.40.mlp.gate",
+      "model.layers.40.mlp.gate.e_score_correction_bias",
+      "model.layers.40.post_attention_layernorm",
+      "model.layers.40.self_attn.indexer.k_norm",
+      "model.layers.40.self_attn.indexer.k_norm.bias",
+      "model.layers.40.self_attn.indexers_proj",
+      "model.layers.40.self_attn.kv_a_layernorm",
+      "model.layers.40.self_attn.q_a_layernorm",
+      "model.layers.41.input_layernorm",
+      "model.layers.41.mlp.gate",
+      "model.layers.41.mlp.gate.e_score_correction_bias",
+      "model.layers.41.post_attention_layernorm",
+      "model.layers.41.self_attn.indexer.k_norm",
+      "model.layers.41.self_attn.indexer.k_norm.bias",
+      "model.layers.41.self_attn.indexers_proj",
+      "model.layers.41.self_attn.kv_a_layernorm",
+      "model.layers.41.self_attn.q_a_layernorm",
+      "model.layers.42.input_layernorm",
+      "model.layers.42.mlp.gate",
+      "model.layers.42.mlp.gate.e_score_correction_bias",
+      "model.layers.42.post_attention_layernorm",
+      "model.layers.42.self_attn.indexer.k_norm",
+      "model.layers.42.self_attn.indexer.k_norm.bias",
+      "model.layers.42.self_attn.indexers_proj",
+      "model.layers.42.self_attn.kv_a_layernorm",
+      "model.layers.42.self_attn.q_a_layernorm",
+      "model.layers.43.input_layernorm",
+      "model.layers.43.mlp.gate",
+      "model.layers.43.mlp.gate.e_score_correction_bias",
+      "model.layers.43.post_attention_layernorm",
+      "model.layers.43.self_attn.indexer.k_norm",
+      "model.layers.43.self_attn.indexer.k_norm.bias",
+      "model.layers.43.self_attn.indexers_proj",
+      "model.layers.43.self_attn.kv_a_layernorm",
+      "model.layers.43.self_attn.q_a_layernorm",
+      "model.layers.44.input_layernorm",
+      "model.layers.44.mlp.gate",
+      "model.layers.44.mlp.gate.e_score_correction_bias",
+      "model.layers.44.post_attention_layernorm",
+      "model.layers.44.self_attn.indexer.k_norm",
+      "model.layers.44.self_attn.indexer.k_norm.bias",
+      "model.layers.44.self_attn.indexers_proj",
+      "model.layers.44.self_attn.kv_a_layernorm",
+      "model.layers.44.self_attn.q_a_layernorm",
+      "model.layers.45.input_layernorm",
+      "model.layers.45.mlp.gate",
+      "model.layers.45.mlp.gate.e_score_correction_bias",
+      "model.layers.45.post_attention_layernorm",
+      "model.layers.45.self_attn.indexer.k_norm",
+      "model.layers.45.self_attn.indexer.k_norm.bias",
+      "model.layers.45.self_attn.indexers_proj",
+      "model.layers.45.self_attn.kv_a_layernorm",
+      "model.layers.45.self_attn.q_a_layernorm",
+      "model.layers.46.input_layernorm",
+      "model.layers.46.mlp.gate",
+      "model.layers.46.mlp.gate.e_score_correction_bias",
+      "model.layers.46.post_attention_layernorm",
+      "model.layers.46.self_attn.indexer.k_norm",
+      "model.layers.46.self_attn.indexer.k_norm.bias",
+      "model.layers.46.self_attn.indexers_proj",
+      "model.layers.46.self_attn.kv_a_layernorm",
+      "model.layers.46.self_attn.q_a_layernorm",
+      "model.layers.47.input_layernorm",
+      "model.layers.47.mlp.gate",
+      "model.layers.47.mlp.gate.e_score_correction_bias",
+      "model.layers.47.post_attention_layernorm",
+      "model.layers.47.self_attn.indexer.k_norm",
+      "model.layers.47.self_attn.indexer.k_norm.bias",
+      "model.layers.47.self_attn.indexers_proj",
+      "model.layers.47.self_attn.kv_a_layernorm",
+      "model.layers.47.self_attn.q_a_layernorm",
+      "model.layers.48.input_layernorm",
+      "model.layers.48.mlp.gate",
+      "model.layers.48.mlp.gate.e_score_correction_bias",
+      "model.layers.48.post_attention_layernorm",
+      "model.layers.48.self_attn.indexer.k_norm",
+      "model.layers.48.self_attn.indexer.k_norm.bias",
+      "model.layers.48.self_attn.indexers_proj",
+      "model.layers.48.self_attn.kv_a_layernorm",
+      "model.layers.48.self_attn.q_a_layernorm",
+      "model.layers.49.input_layernorm",
+      "model.layers.49.mlp.gate",
+      "model.layers.49.mlp.gate.e_score_correction_bias",
+      "model.layers.49.post_attention_layernorm",
+      "model.layers.49.self_attn.indexer.k_norm",
+      "model.layers.49.self_attn.indexer.k_norm.bias",
+      "model.layers.49.self_attn.indexers_proj",
+      "model.layers.49.self_attn.kv_a_layernorm",
+      "model.layers.49.self_attn.q_a_layernorm",
+      "model.layers.50.input_layernorm",
+      "model.layers.50.mlp.gate",
+      "model.layers.50.mlp.gate.e_score_correction_bias",
+      "model.layers.50.post_attention_layernorm",
+      "model.layers.50.self_attn.indexer.k_norm",
+      "model.layers.50.self_attn.indexer.k_norm.bias",
+      "model.layers.50.self_attn.indexers_proj",
+      "model.layers.50.self_attn.kv_a_layernorm",
+      "model.layers.50.self_attn.q_a_layernorm",
+      "model.layers.51.input_layernorm",
+      "model.layers.51.mlp.gate",
+      "model.layers.51.mlp.gate.e_score_correction_bias",
+      "model.layers.51.post_attention_layernorm",
+      "model.layers.51.self_attn.indexer.k_norm",
+      "model.layers.51.self_attn.indexer.k_norm.bias",
+      "model.layers.51.self_attn.indexers_proj",
+      "model.layers.51.self_attn.kv_a_layernorm",
+      "model.layers.51.self_attn.q_a_layernorm",
+      "model.layers.52.input_layernorm",
+      "model.layers.52.mlp.gate",
+      "model.layers.52.mlp.gate.e_score_correction_bias",
+      "model.layers.52.post_attention_layernorm",
+      "model.layers.52.self_attn.indexer.k_norm",
+      "model.layers.52.self_attn.indexer.k_norm.bias",
+      "model.layers.52.self_attn.indexers_proj",
+      "model.layers.52.self_attn.kv_a_layernorm",
+      "model.layers.52.self_attn.q_a_layernorm",
+      "model.layers.53.input_layernorm",
+      "model.layers.53.mlp.gate",
+      "model.layers.53.mlp.gate.e_score_correction_bias",
+      "model.layers.53.post_attention_layernorm",
+      "model.layers.53.self_attn.indexer.k_norm",
+      "model.layers.53.self_attn.indexer.k_norm.bias",
+      "model.layers.53.self_attn.indexers_proj",
+      "model.layers.53.self_attn.kv_a_layernorm",
+      "model.layers.53.self_attn.q_a_layernorm",
+      "model.layers.54.input_layernorm",
+      "model.layers.54.mlp.gate",
+      "model.layers.54.mlp.gate.e_score_correction_bias",
+      "model.layers.54.post_attention_layernorm",
+      "model.layers.54.self_attn.indexer.k_norm",
+      "model.layers.54.self_attn.indexer.k_norm.bias",
+      "model.layers.54.self_attn.indexers_proj",
+      "model.layers.54.self_attn.kv_a_layernorm",
+      "model.layers.54.self_attn.q_a_layernorm",
+      "model.layers.55.input_layernorm",
+      "model.layers.55.mlp.gate",
+      "model.layers.55.mlp.gate.e_score_correction_bias",
+      "model.layers.55.post_attention_layernorm",
+      "model.layers.55.self_attn.indexer.k_norm",
+      "model.layers.55.self_attn.indexer.k_norm.bias",
+      "model.layers.55.self_attn.indexers_proj",
+      "model.layers.55.self_attn.kv_a_layernorm",
+      "model.layers.55.self_attn.q_a_layernorm",
+      "model.layers.56.input_layernorm",
+      "model.layers.56.mlp.gate",
+      "model.layers.56.mlp.gate.e_score_correction_bias",
+      "model.layers.56.post_attention_layernorm",
+      "model.layers.56.self_attn.indexer.k_norm",
+      "model.layers.56.self_attn.indexer.k_norm.bias",
+      "model.layers.56.self_attn.indexers_proj",
+      "model.layers.56.self_attn.kv_a_layernorm",
+      "model.layers.56.self_attn.q_a_layernorm",
+      "model.layers.57.input_layernorm",
+      "model.layers.57.mlp.gate",
+      "model.layers.57.mlp.gate.e_score_correction_bias",
+      "model.layers.57.post_attention_layernorm",
+      "model.layers.57.self_attn.indexer.k_norm",
+      "model.layers.57.self_attn.indexer.k_norm.bias",
+      "model.layers.57.self_attn.indexers_proj",
+      "model.layers.57.self_attn.kv_a_layernorm",
+      "model.layers.57.self_attn.q_a_layernorm",
+      "model.layers.58.input_layernorm",
+      "model.layers.58.mlp.gate",
+      "model.layers.58.mlp.gate.e_score_correction_bias",
+      "model.layers.58.post_attention_layernorm",
+      "model.layers.58.self_attn.indexer.k_norm",
+      "model.layers.58.self_attn.indexer.k_norm.bias",
+      "model.layers.58.self_attn.indexers_proj",
+      "model.layers.58.self_attn.kv_a_layernorm",
+      "model.layers.58.self_attn.q_a_layernorm",
+      "model.layers.59.input_layernorm",
+      "model.layers.59.mlp.gate",
+      "model.layers.59.mlp.gate.e_score_correction_bias",
+      "model.layers.59.post_attention_layernorm",
+      "model.layers.59.self_attn.indexer.k_norm",
+      "model.layers.59.self_attn.indexer.k_norm.bias",
+      "model.layers.59.self_attn.indexers_proj",
+      "model.layers.59.self_attn.kv_a_layernorm",
+      "model.layers.59.self_attn.q_a_layernorm",
+      "model.layers.60.input_layernorm",
+      "model.layers.60.mlp.gate",
+      "model.layers.60.mlp.gate.e_score_correction_bias",
+      "model.layers.60.post_attention_layernorm",
+      "model.layers.60.self_attn.indexer.k_norm",
+      "model.layers.60.self_attn.indexer.k_norm.bias",
+      "model.layers.60.self_attn.indexers_proj",
+      "model.layers.60.self_attn.kv_a_layernorm",
+      "model.layers.60.self_attn.q_a_layernorm",
+      "model.layers.61.input_layernorm",
+      "model.layers.61.mlp.gate",
+      "model.layers.61.mlp.gate.e_score_correction_bias",
+      "model.layers.61.post_attention_layernorm",
+      "model.layers.61.self_attn.indexer.k_norm",
+      "model.layers.61.self_attn.indexer.k_norm.bias",
+      "model.layers.61.self_attn.indexers_proj",
+      "model.layers.61.self_attn.kv_a_layernorm",
+      "model.layers.61.self_attn.q_a_layernorm",
+      "model.layers.62.input_layernorm",
+      "model.layers.62.mlp.gate",
+      "model.layers.62.mlp.gate.e_score_correction_bias",
+      "model.layers.62.post_attention_layernorm",
+      "model.layers.62.self_attn.indexer.k_norm",
+      "model.layers.62.self_attn.indexer.k_norm.bias",
+      "model.layers.62.self_attn.indexers_proj",
+      "model.layers.62.self_attn.kv_a_layernorm",
+      "model.layers.62.self_attn.q_a_layernorm",
+      "model.layers.63.input_layernorm",
+      "model.layers.63.mlp.gate",
+      "model.layers.63.mlp.gate.e_score_correction_bias",
+      "model.layers.63.post_attention_layernorm",
+      "model.layers.63.self_attn.indexer.k_norm",
+      "model.layers.63.self_attn.indexer.k_norm.bias",
+      "model.layers.63.self_attn.indexers_proj",
+      "model.layers.63.self_attn.kv_a_layernorm",
+      "model.layers.63.self_attn.q_a_layernorm",
+      "model.layers.64.input_layernorm",
+      "model.layers.64.mlp.gate",
+      "model.layers.64.mlp.gate.e_score_correction_bias",
+      "model.layers.64.post_attention_layernorm",
+      "model.layers.64.self_attn.indexer.k_norm",
+      "model.layers.64.self_attn.indexer.k_norm.bias",
+      "model.layers.64.self_attn.indexers_proj",
+      "model.layers.64.self_attn.kv_a_layernorm",
+      "model.layers.64.self_attn.q_a_layernorm",
+      "model.layers.65.input_layernorm",
+      "model.layers.65.mlp.gate",
+      "model.layers.65.mlp.gate.e_score_correction_bias",
+      "model.layers.65.post_attention_layernorm",
+      "model.layers.65.self_attn.indexer.k_norm",
+      "model.layers.65.self_attn.indexer.k_norm.bias",
+      "model.layers.65.self_attn.indexers_proj",
+      "model.layers.65.self_attn.kv_a_layernorm",
+      "model.layers.65.self_attn.q_a_layernorm",
+      "model.layers.66.input_layernorm",
+      "model.layers.66.mlp.gate",
+      "model.layers.66.mlp.gate.e_score_correction_bias",
+      "model.layers.66.post_attention_layernorm",
+      "model.layers.66.self_attn.indexer.k_norm",
+      "model.layers.66.self_attn.indexer.k_norm.bias",
+      "model.layers.66.self_attn.indexers_proj",
+      "model.layers.66.self_attn.kv_a_layernorm",
+      "model.layers.66.self_attn.q_a_layernorm",
+      "model.layers.67.input_layernorm",
+      "model.layers.67.mlp.gate",
+      "model.layers.67.mlp.gate.e_score_correction_bias",
+      "model.layers.67.post_attention_layernorm",
+      "model.layers.67.self_attn.indexer.k_norm",
+      "model.layers.67.self_attn.indexer.k_norm.bias",
+      "model.layers.67.self_attn.indexers_proj",
+      "model.layers.67.self_attn.kv_a_layernorm",
+      "model.layers.67.self_attn.q_a_layernorm",
+      "model.layers.68.input_layernorm",
+      "model.layers.68.mlp.gate",
+      "model.layers.68.mlp.gate.e_score_correction_bias",
+      "model.layers.68.post_attention_layernorm",
+      "model.layers.68.self_attn.indexer.k_norm",
+      "model.layers.68.self_attn.indexer.k_norm.bias",
+      "model.layers.68.self_attn.indexers_proj",
+      "model.layers.68.self_attn.kv_a_layernorm",
+      "model.layers.68.self_attn.q_a_layernorm",
+      "model.layers.69.input_layernorm",
+      "model.layers.69.mlp.gate",
+      "model.layers.69.mlp.gate.e_score_correction_bias",
+      "model.layers.69.post_attention_layernorm",
+      "model.layers.69.self_attn.indexer.k_norm",
+      "model.layers.69.self_attn.indexer.k_norm.bias",
+      "model.layers.69.self_attn.indexers_proj",
+      "model.layers.69.self_attn.kv_a_layernorm",
+      "model.layers.69.self_attn.q_a_layernorm",
+      "model.layers.70.input_layernorm",
+      "model.layers.70.mlp.gate",
+      "model.layers.70.mlp.gate.e_score_correction_bias",
+      "model.layers.70.post_attention_layernorm",
+      "model.layers.70.self_attn.indexer.k_norm",
+      "model.layers.70.self_attn.indexer.k_norm.bias",
+      "model.layers.70.self_attn.indexers_proj",
+      "model.layers.70.self_attn.kv_a_layernorm",
+      "model.layers.70.self_attn.q_a_layernorm",
+      "model.layers.71.input_layernorm",
+      "model.layers.71.mlp.gate",
+      "model.layers.71.mlp.gate.e_score_correction_bias",
+      "model.layers.71.post_attention_layernorm",
+      "model.layers.71.self_attn.indexer.k_norm",
+      "model.layers.71.self_attn.indexer.k_norm.bias",
+      "model.layers.71.self_attn.indexers_proj",
+      "model.layers.71.self_attn.kv_a_layernorm",
+      "model.layers.71.self_attn.q_a_layernorm",
+      "model.layers.72.input_layernorm",
+      "model.layers.72.mlp.gate",
+      "model.layers.72.mlp.gate.e_score_correction_bias",
+      "model.layers.72.post_attention_layernorm",
+      "model.layers.72.self_attn.indexer.k_norm",
+      "model.layers.72.self_attn.indexer.k_norm.bias",
+      "model.layers.72.self_attn.indexers_proj",
+      "model.layers.72.self_attn.kv_a_layernorm",
+      "model.layers.72.self_attn.q_a_layernorm",
+      "model.layers.73.input_layernorm",
+      "model.layers.73.mlp.gate",
+      "model.layers.73.mlp.gate.e_score_correction_bias",
+      "model.layers.73.post_attention_layernorm",
+      "model.layers.73.self_attn.indexer.k_norm",
+      "model.layers.73.self_attn.indexer.k_norm.bias",
+      "model.layers.73.self_attn.indexers_proj",
+      "model.layers.73.self_attn.kv_a_layernorm",
+      "model.layers.73.self_attn.q_a_layernorm",
+      "model.layers.74.input_layernorm",
+      "model.layers.74.mlp.gate",
+      "model.layers.74.mlp.gate.e_score_correction_bias",
+      "model.layers.74.post_attention_layernorm",
+      "model.layers.74.self_attn.indexer.k_norm",
+      "model.layers.74.self_attn.indexer.k_norm.bias",
+      "model.layers.74.self_attn.indexers_proj",
+      "model.layers.74.self_attn.kv_a_layernorm",
+      "model.layers.74.self_attn.q_a_layernorm",
+      "model.layers.75.input_layernorm",
+      "model.layers.75.mlp.gate",
+      "model.layers.75.mlp.gate.e_score_correction_bias",
+      "model.layers.75.post_attention_layernorm",
+      "model.layers.75.self_attn.indexer.k_norm",
+      "model.layers.75.self_attn.indexer.k_norm.bias",
+      "model.layers.75.self_attn.indexers_proj",
+      "model.layers.75.self_attn.kv_a_layernorm",
+      "model.layers.75.self_attn.q_a_layernorm",
+      "model.layers.76.input_layernorm",
+      "model.layers.76.mlp.gate",
+      "model.layers.76.mlp.gate.e_score_correction_bias",
+      "model.layers.76.post_attention_layernorm",
+      "model.layers.76.self_attn.indexer.k_norm",
+      "model.layers.76.self_attn.indexer.k_norm.bias",
+      "model.layers.76.self_attn.indexers_proj",
+      "model.layers.76.self_attn.kv_a_layernorm",
+      "model.layers.76.self_attn.q_a_layernorm",
+      "model.layers.77.input_layernorm",
+      "model.layers.77.mlp.gate",
+      "model.layers.77.mlp.gate.e_score_correction_bias",
+      "model.layers.77.post_attention_layernorm",
+      "model.layers.77.self_attn.indexer.k_norm",
+      "model.layers.77.self_attn.indexer.k_norm.bias",
+      "model.layers.77.self_attn.indexers_proj",
+      "model.layers.77.self_attn.kv_a_layernorm",
+      "model.layers.77.self_attn.q_a_layernorm",
+      "model.layers.78.eh_proj",
+      "model.layers.78.enorm",
+      "model.layers.78.hnorm",
+      "model.layers.78.input_layernorm",
+      "model.layers.78.mlp.gate",
+      "model.layers.78.mlp.gate.e_score_correction_bias",
+      "model.layers.78.post_attention_layernorm",
+      "model.layers.78.self_attn.indexer.k_norm",
+      "model.layers.78.self_attn.indexer.k_norm.bias",
+      "model.layers.78.self_attn.indexers_proj",
+      "model.layers.78.self_attn.kv_a_layernorm",
+      "model.layers.78.self_attn.q_a_layernorm",
+      "model.layers.78.shared_head.norm",
+      "model.norm"
+    ]
+  }
+}
--- a/trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer.json
+++ b/trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer.json
--- a/trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer_config.json
+++ b/trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer_config.json
@@ -0,0 +1,33 @@
+{
+  "backend": "tokenizers",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": [
+    "<|endoftext|>",
+    "[MASK]",
+    "[gMASK]",
+    "[sMASK]",
+    "<sop>",
+    "<eop>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|observation|>",
+    "<|begin_of_image|>",
+    "<|end_of_image|>",
+    "<|begin_of_video|>",
+    "<|end_of_video|>",
+    "<|begin_of_audio|>",
+    "<|end_of_audio|>",
+    "<|begin_of_transcription|>",
+    "<|end_of_transcription|>"
+  ],
+  "is_local": true,
+  "model_max_length": 202752,
+  "model_specific_special_tokens": {},
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "remove_space": false,
+  "tokenizer_class": "TokenizersBackend"
+}
--- a/trace_model_meta/init.py
+++ b/trace_model_meta/init.py
@@ -0,0 +1,23 @@
+from .registry import (
+    ModelMeta,
+    detect_model_family_from_features,
+    detect_model_family_from_records,
+    detect_model_family_from_trace_file,
+    get_model_meta,
+    infer_model_family_from_request_model,
+    resolve_chat_template_path,
+    resolve_model_family,
+    resolve_tokenizer_path,
+)
+
+__all__ = [
+    "ModelMeta",
+    "detect_model_family_from_features",
+    "detect_model_family_from_records",
+    "detect_model_family_from_trace_file",
+    "get_model_meta",
+    "infer_model_family_from_request_model",
+    "resolve_chat_template_path",
+    "resolve_model_family",
+    "resolve_tokenizer_path",
+]
--- a/trace_model_meta/registry.py
+++ b/trace_model_meta/registry.py
@@ -0,0 +1,201 @@
+from __future__ import annotations
+
+import csv
+import json
+from dataclasses import dataclass
+from pathlib import Path
+
+
+MODEL_META_ROOT = Path(__file__).resolve().parent
+
+
+@dataclass(frozen=True)
+class ModelMeta:
+    family: str
+    provider: str
+    model_name: str
+    request_model_hints: tuple[str, ...]
+
+    @property
+    def model_dir(self) -> Path:
+        return MODEL_META_ROOT / self.provider / self.model_name
+
+    @property
+    def tokenizer_path(self) -> Path:
+        return self.model_dir / "tokenizer.json"
+
+    @property
+    def chat_template_path(self) -> Path:
+        return self.model_dir / "chat_template.jinja"
+
+
+MODEL_REGISTRY = {
+    "glm5": ModelMeta(
+        family="glm5",
+        provider="ZhipuAI",
+        model_name="GLM-5-FP8",
+        request_model_hints=("glm", "zhipu"),
+    ),
+    "qwen3-coder": ModelMeta(
+        family="qwen3-coder",
+        provider="Qwen",
+        model_name="Qwen3-Coder-480B-A35B-Instruct",
+        request_model_hints=("qwen3-coder", "qwen3 coder", "qwen3_coder"),
+    ),
+}
+
+MODEL_ALIASES = {
+    "glm": "glm5",
+    "glm5": "glm5",
+    "zhipu-glm5": "glm5",
+    "zhipuai-glm5": "glm5",
+    "qwen": "qwen3-coder",
+    "qwen3": "qwen3-coder",
+    "qwen3-coder": "qwen3-coder",
+    "qwen3_coder": "qwen3-coder",
+    "coder": "qwen3-coder",
+}
+
+
+def infer_model_family_from_request_model(request_model: str | None) -> str | None:
+    text = str(request_model or "").strip().lower()
+    if not text:
+        return None
+    for family, meta in MODEL_REGISTRY.items():
+        if any(hint in text for hint in meta.request_model_hints):
+            return family
+    return None
+
+
+def _infer_model_family_from_path(input_path: str | Path | None) -> str | None:
+    text = str(input_path or "").strip().lower()
+    if not text:
+        return None
+    if "qwen3-coder" in text or "qwen3_coder" in text:
+        return "qwen3-coder"
+    if "glm5" in text or "trace-glm" in text:
+        return "glm5"
+    return None
+
+
+def detect_model_family_from_trace_file(path: str | Path) -> str | None:
+    resolved = Path(path)
+    with resolved.open("r", encoding="utf-8") as handle:
+        for line in handle:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            raw = json.loads(stripped)
+            if isinstance(raw.get("meta"), dict):
+                meta = raw["meta"]
+                family = str(meta.get("model_family", "")).strip()
+                if family:
+                    return resolve_model_family(family)
+                inferred = infer_model_family_from_request_model(meta.get("request_model"))
+                if inferred:
+                    return inferred
+            inferred = infer_model_family_from_request_model(raw.get("request_model"))
+            if inferred:
+                return inferred
+            break
+    return _infer_model_family_from_path(path)
+
+
+def detect_model_family_from_features(path: str | Path) -> str | None:
+    resolved = Path(path)
+    with resolved.open("r", encoding="utf-8") as handle:
+        reader = csv.DictReader(handle)
+        for row in reader:
+            inferred = infer_model_family_from_request_model(row.get("model"))
+            if inferred:
+                return inferred
+            break
+    return _infer_model_family_from_path(path)
+
+
+def detect_model_family_from_records(records) -> str | None:
+    for record in records:
+        inferred = infer_model_family_from_request_model(record.meta.request_model)
+        if inferred:
+            return inferred
+        break
+    return None
+
+
+def resolve_model_family(
+    model_family: str | None = None,
+    *,
+    request_model: str | None = None,
+    input_path: str | Path | None = None,
+    features_path: str | Path | None = None,
+    records=None,
+) -> str:
+    candidate = str(model_family or "auto").strip().lower()
+    if candidate and candidate != "auto":
+        if candidate in MODEL_ALIASES:
+            return MODEL_ALIASES[candidate]
+        raise ValueError(f"Unsupported model family: {model_family}")
+
+    inferred = infer_model_family_from_request_model(request_model)
+    if inferred:
+        return inferred
+    if records is not None:
+        inferred = detect_model_family_from_records(records)
+        if inferred:
+            return inferred
+    if features_path is not None:
+        inferred = detect_model_family_from_features(features_path)
+        if inferred:
+            return inferred
+    if input_path is not None:
+        inferred = detect_model_family_from_trace_file(input_path)
+        if inferred:
+            return inferred
+    return "glm5"
+
+
+def get_model_meta(model_family: str | None = None, *, model_meta_dir: str | Path | None = None, **kwargs) -> ModelMeta:
+    family = resolve_model_family(model_family, **kwargs)
+    base_meta = MODEL_REGISTRY[family]
+    if model_meta_dir is None:
+        return base_meta
+
+    custom_root = Path(model_meta_dir)
+    custom_model_dir = custom_root / base_meta.provider / base_meta.model_name
+    if not custom_model_dir.exists():
+        raise FileNotFoundError(f"Model meta directory not found for {family}: {custom_model_dir}")
+    return ModelMeta(
+        family=base_meta.family,
+        provider=base_meta.provider,
+        model_name=base_meta.model_name,
+        request_model_hints=base_meta.request_model_hints,
+    )
+
+
+def resolve_chat_template_path(
+    model_family: str | None = None,
+    *,
+    model_meta_dir: str | Path | None = None,
+    **kwargs,
+) -> Path:
+    family = resolve_model_family(model_family, **kwargs)
+    meta = MODEL_REGISTRY[family]
+    model_dir = Path(model_meta_dir) / meta.provider / meta.model_name if model_meta_dir else meta.model_dir
+    return model_dir / "chat_template.jinja"
+
+
+def resolve_tokenizer_path(
+    tokenizer_path: str | Path | None = None,
+    *,
+    model_family: str | None = None,
+    model_meta_dir: str | Path | None = None,
+    **kwargs,
+) -> str:
+    if tokenizer_path:
+        path = Path(tokenizer_path)
+        return str(path.parent if path.is_file() else path)
+
+    family = resolve_model_family(model_family, **kwargs)
+    meta = MODEL_REGISTRY[family]
+    model_dir = Path(model_meta_dir) / meta.provider / meta.model_name if model_meta_dir else meta.model_dir
+    return str(model_dir)