Initial commit
This commit is contained in:
@@ -0,0 +1,117 @@
|
||||
{% macro render_extra_keys(json_dict, handled_keys) %}
|
||||
{%- if json_dict is mapping %}
|
||||
{%- for json_key in json_dict if json_key not in handled_keys %}
|
||||
{%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
|
||||
{{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
|
||||
{%- else %}
|
||||
{{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{% endmacro %}
|
||||
|
||||
{%- if messages[0]["role"] == "system" %}
|
||||
{%- set system_message = messages[0]["content"] %}
|
||||
{%- set loop_messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set loop_messages = messages %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if not tools is defined %}
|
||||
{%- set tools = [] %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if system_message is defined %}
|
||||
{{- "<|im_start|>system\n" + system_message }}
|
||||
{%- else %}
|
||||
{%- if tools is iterable and tools | length > 0 %}
|
||||
{{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if tools is iterable and tools | length > 0 %}
|
||||
{{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
|
||||
{{- "<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{%- if tool.function is defined %}
|
||||
{%- set tool = tool.function %}
|
||||
{%- endif %}
|
||||
{{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
|
||||
{%- if tool.description is defined %}
|
||||
{{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
|
||||
{%- endif %}
|
||||
{{- '\n<parameters>' }}
|
||||
{%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
|
||||
{%- for param_name, param_fields in tool.parameters.properties|items %}
|
||||
{{- '\n<parameter>' }}
|
||||
{{- '\n<name>' ~ param_name ~ '</name>' }}
|
||||
{%- if param_fields.type is defined %}
|
||||
{{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
|
||||
{%- endif %}
|
||||
{%- if param_fields.description is defined %}
|
||||
{{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
|
||||
{%- endif %}
|
||||
{%- set handled_keys = ['name', 'type', 'description'] %}
|
||||
{{- render_extra_keys(param_fields, handled_keys) }}
|
||||
{{- '\n</parameter>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{% set handled_keys = ['type', 'properties'] %}
|
||||
{{- render_extra_keys(tool.parameters, handled_keys) }}
|
||||
{{- '\n</parameters>' }}
|
||||
{%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
|
||||
{{- render_extra_keys(tool, handled_keys) }}
|
||||
{{- '\n</function>' }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>" }}
|
||||
{{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
|
||||
{%- endif %}
|
||||
{%- if system_message is defined %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- else %}
|
||||
{%- if tools is iterable and tools | length > 0 %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in loop_messages %}
|
||||
{%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
|
||||
{{- '<|im_start|>' + message.role }}
|
||||
{%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
|
||||
{{- '\n' + message.content | trim + '\n' }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
||||
{%- if tool_call.arguments is defined %}
|
||||
{%- for args_name, args_value in tool_call.arguments|items %}
|
||||
{{- '<parameter=' + args_name + '>\n' }}
|
||||
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
|
||||
{{- args_value }}
|
||||
{{- '\n</parameter>\n' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '</function>\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.previtem and loop.previtem.role != "tool" %}
|
||||
{{- '<|im_start|>user\n' }}
|
||||
{%- endif %}
|
||||
{{- '<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>\n' }}
|
||||
{%- if not loop.last and loop.nextitem.role != "tool" %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif loop.last %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
@@ -0,0 +1,39 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3MoeForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"decoder_sparse_step": 1,
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 6144,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 8192,
|
||||
"max_position_embeddings": 262144,
|
||||
"max_window_layers": 62,
|
||||
"mlp_only_layers": [],
|
||||
"model_type": "qwen3_moe",
|
||||
"moe_intermediate_size": 2560,
|
||||
"norm_topk_prob": true,
|
||||
"num_attention_heads": 96,
|
||||
"num_experts": 160,
|
||||
"num_experts_per_tok": 8,
|
||||
"num_hidden_layers": 62,
|
||||
"num_key_value_heads": 8,
|
||||
"output_router_logits": false,
|
||||
"qkv_bias": false,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 10000000,
|
||||
"router_aux_loss_coef": 0.0,
|
||||
"shared_expert_intermediate_size": 0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.51.0",
|
||||
"use_cache": true,
|
||||
"use_qk_norm": true,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
@@ -0,0 +1,689 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import ast
|
||||
import json
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import regex as re
|
||||
|
||||
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
ChatCompletionToolsParam,
|
||||
DeltaFunctionCall, DeltaMessage,
|
||||
DeltaToolCall,
|
||||
ExtractedToolCallInformation,
|
||||
FunctionCall, ToolCall)
|
||||
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
|
||||
ToolParser, ToolParserManager)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
@ToolParserManager.register_module("qwen3_coder")
|
||||
class Qwen3CoderToolParser(ToolParser):
|
||||
|
||||
def __init__(self, tokenizer: AnyTokenizer):
|
||||
super().__init__(tokenizer)
|
||||
|
||||
self.current_tool_name_sent: bool = False
|
||||
self.prev_tool_call_arr: list[dict] = []
|
||||
self.current_tool_id: int = -1
|
||||
self.streamed_args_for_tool: list[str] = []
|
||||
|
||||
# Sentinel tokens for streaming mode
|
||||
self.tool_call_start_token: str = "<tool_call>"
|
||||
self.tool_call_end_token: str = "</tool_call>"
|
||||
self.tool_call_prefix: str = "<function="
|
||||
self.function_end_token: str = "</function>"
|
||||
self.parameter_prefix: str = "<parameter="
|
||||
self.parameter_end_token: str = "</parameter>"
|
||||
self.is_tool_call_started: bool = False
|
||||
self.failed_count: int = 0
|
||||
|
||||
# Enhanced streaming state - reset for each new message
|
||||
self._reset_streaming_state()
|
||||
|
||||
# Regex patterns
|
||||
self.tool_call_complete_regex = re.compile(
|
||||
r"<tool_call>(.*?)</tool_call>", re.DOTALL)
|
||||
self.tool_call_regex = re.compile(
|
||||
r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL)
|
||||
self.tool_call_function_regex = re.compile(
|
||||
r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
|
||||
self.tool_call_parameter_regex = re.compile(
|
||||
r"<parameter=(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>)|$)",
|
||||
re.DOTALL)
|
||||
|
||||
if not self.model_tokenizer:
|
||||
raise ValueError(
|
||||
"The model tokenizer must be passed to the ToolParser "
|
||||
"constructor during construction.")
|
||||
|
||||
self.tool_call_start_token_id = self.vocab.get(
|
||||
self.tool_call_start_token)
|
||||
self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
|
||||
|
||||
if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
|
||||
raise RuntimeError(
|
||||
"Qwen3 XML Tool parser could not locate tool call start/end "
|
||||
"tokens in the tokenizer!")
|
||||
|
||||
logger.info(
|
||||
f"vLLM Successfully import tool parser {self.__class__.__name__} !"
|
||||
)
|
||||
|
||||
def _generate_tool_call_id(self) -> str:
|
||||
"""Generate a unique tool call ID."""
|
||||
return f"call_{uuid.uuid4().hex[:24]}"
|
||||
|
||||
def _reset_streaming_state(self):
|
||||
"""Reset all streaming state."""
|
||||
self.current_tool_index = 0
|
||||
self.is_tool_call_started = False
|
||||
self.header_sent = False
|
||||
self.current_tool_id = None
|
||||
self.current_function_name = None
|
||||
self.current_param_name = None
|
||||
self.current_param_value = ""
|
||||
self.param_count = 0
|
||||
self.in_param = False
|
||||
self.in_function = False
|
||||
self.accumulated_text = ""
|
||||
self.json_started = False
|
||||
self.json_closed = False
|
||||
# Store accumulated parameters for type conversion
|
||||
self.accumulated_params = {}
|
||||
self.streaming_request = None
|
||||
|
||||
def _get_arguments_config(
|
||||
self, func_name: str,
|
||||
tools: Optional[list[ChatCompletionToolsParam]]) -> dict:
|
||||
"""Extract argument configuration for a function."""
|
||||
if tools is None:
|
||||
return {}
|
||||
for config in tools:
|
||||
if not hasattr(config, "type") or not (hasattr(
|
||||
config, "function") and hasattr(config.function, "name")):
|
||||
continue
|
||||
if config.type == "function" and config.function.name == func_name:
|
||||
if not hasattr(config.function, "parameters"):
|
||||
return {}
|
||||
params = config.function.parameters
|
||||
if isinstance(params, dict) and "properties" in params:
|
||||
return params["properties"]
|
||||
elif isinstance(params, dict):
|
||||
return params
|
||||
else:
|
||||
return {}
|
||||
logger.warning(f"Tool '{func_name}' is not defined in the tools list.")
|
||||
return {}
|
||||
|
||||
def _convert_param_value(self, param_value: str, param_name: str,
|
||||
param_config: dict, func_name: str) -> Any:
|
||||
"""Convert parameter value based on its type in the schema."""
|
||||
# Handle null value for any type
|
||||
if param_value.lower() == "null":
|
||||
return None
|
||||
|
||||
if param_name not in param_config:
|
||||
if param_config != {}:
|
||||
logger.warning(
|
||||
f"Parsed parameter '{param_name}' is not defined in the tool "
|
||||
f"parameters for tool '{func_name}', directly returning the string value."
|
||||
)
|
||||
return param_value
|
||||
|
||||
if isinstance(param_config[param_name],
|
||||
dict) and "type" in param_config[param_name]:
|
||||
param_type = str(param_config[param_name]["type"]).strip().lower()
|
||||
else:
|
||||
param_type = "string"
|
||||
if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
|
||||
return param_value
|
||||
elif param_type.startswith("int") or param_type.startswith(
|
||||
"uint") or param_type.startswith(
|
||||
"long") or param_type.startswith(
|
||||
"short") or param_type.startswith("unsigned"):
|
||||
try:
|
||||
param_value = int(param_value)
|
||||
except:
|
||||
logger.warning(
|
||||
f"Parsed value '{param_value}' of parameter '{param_name}' is not an integer in tool "
|
||||
f"'{func_name}', degenerating to string.")
|
||||
return param_value
|
||||
elif param_type.startswith("num") or param_type.startswith("float"):
|
||||
try:
|
||||
float_param_value = float(param_value)
|
||||
param_value = float_param_value if float_param_value - int(
|
||||
float_param_value) != 0 else int(float_param_value)
|
||||
except:
|
||||
logger.warning(
|
||||
f"Parsed value '{param_value}' of parameter '{param_name}' is not a float in tool "
|
||||
f"'{func_name}', degenerating to string.")
|
||||
return param_value
|
||||
elif param_type in ["boolean", "bool", "binary"]:
|
||||
param_value = param_value.lower()
|
||||
if param_value not in ["true", "false"]:
|
||||
logger.warning(
|
||||
f"Parsed value '{param_value}' of parameter '{param_name}' is not a boolean (`true` of `false`) in tool '{func_name}', degenerating to false."
|
||||
)
|
||||
return param_value == "true"
|
||||
else:
|
||||
if param_type in ["object", "array", "arr"
|
||||
] or param_type.startswith(
|
||||
"dict") or param_type.startswith("list"):
|
||||
try:
|
||||
param_value = json.loads(param_value)
|
||||
return param_value
|
||||
except:
|
||||
logger.warning(
|
||||
f"Parsed value '{param_value}' of parameter '{param_name}' cannot be parsed with json.loads in tool "
|
||||
f"'{func_name}', will try other methods to parse it.")
|
||||
try:
|
||||
param_value = ast.literal_eval(param_value) # safer
|
||||
except:
|
||||
logger.warning(
|
||||
f"Parsed value '{param_value}' of parameter '{param_name}' cannot be converted via Python `ast.literal_eval()` in tool '{func_name}', degenerating to string."
|
||||
)
|
||||
return param_value
|
||||
|
||||
def _parse_xml_function_call(
|
||||
self, function_call_str: str,
|
||||
tools: Optional[list[ChatCompletionToolsParam]]
|
||||
) -> Optional[ToolCall]:
|
||||
|
||||
# Extract function name
|
||||
end_index = function_call_str.index(">")
|
||||
function_name = function_call_str[:end_index]
|
||||
param_config = self._get_arguments_config(function_name, tools)
|
||||
parameters = function_call_str[end_index + 1:]
|
||||
param_dict = {}
|
||||
for match_text in self.tool_call_parameter_regex.findall(parameters):
|
||||
idx = match_text.index(">")
|
||||
param_name = match_text[:idx]
|
||||
param_value = str(match_text[idx + 1:])
|
||||
# Remove prefix and trailing \n
|
||||
if param_value.startswith("\n"):
|
||||
param_value = param_value[1:]
|
||||
if param_value.endswith("\n"):
|
||||
param_value = param_value[:-1]
|
||||
|
||||
param_dict[param_name] = self._convert_param_value(
|
||||
param_value, param_name, param_config, function_name)
|
||||
return ToolCall(
|
||||
type="function",
|
||||
function=FunctionCall(name=function_name,
|
||||
arguments=json.dumps(param_dict,
|
||||
ensure_ascii=False)),
|
||||
)
|
||||
|
||||
def _get_function_calls(self, model_output: str) -> List[str]:
|
||||
# Find all tool calls
|
||||
matched_ranges = self.tool_call_regex.findall(model_output)
|
||||
raw_tool_calls = [
|
||||
match[0] if match[0] else match[1] for match in matched_ranges
|
||||
]
|
||||
|
||||
# Back-off strategy if no tool_call tags found
|
||||
if len(raw_tool_calls) == 0:
|
||||
raw_tool_calls = [model_output]
|
||||
|
||||
raw_function_calls = []
|
||||
for tool_call in raw_tool_calls:
|
||||
raw_function_calls.extend(
|
||||
self.tool_call_function_regex.findall(tool_call))
|
||||
|
||||
function_calls = [
|
||||
match[0] if match[0] else match[1] for match in raw_function_calls
|
||||
]
|
||||
return function_calls
|
||||
|
||||
def extract_tool_calls(
|
||||
self,
|
||||
model_output: str,
|
||||
request: ChatCompletionRequest,
|
||||
) -> ExtractedToolCallInformation:
|
||||
# Quick check to avoid unnecessary processing
|
||||
if self.tool_call_prefix not in model_output:
|
||||
return ExtractedToolCallInformation(tools_called=False,
|
||||
tool_calls=[],
|
||||
content=model_output)
|
||||
|
||||
try:
|
||||
function_calls = self._get_function_calls(model_output)
|
||||
if len(function_calls) == 0:
|
||||
return ExtractedToolCallInformation(tools_called=False,
|
||||
tool_calls=[],
|
||||
content=model_output)
|
||||
|
||||
tool_calls = [
|
||||
self._parse_xml_function_call(function_call_str, request.tools)
|
||||
for function_call_str in function_calls
|
||||
]
|
||||
|
||||
# Populate prev_tool_call_arr for serving layer to set finish_reason
|
||||
self.prev_tool_call_arr.clear() # Clear previous calls
|
||||
for tool_call in tool_calls:
|
||||
if tool_call:
|
||||
self.prev_tool_call_arr.append({
|
||||
"name":
|
||||
tool_call.function.name,
|
||||
"arguments":
|
||||
tool_call.function.arguments,
|
||||
})
|
||||
|
||||
# Extract content before tool calls
|
||||
content_index = model_output.find(self.tool_call_start_token)
|
||||
content_index = content_index if content_index >= 0 else model_output.find(
|
||||
self.tool_call_prefix)
|
||||
content = model_output[:content_index] # .rstrip()
|
||||
|
||||
return ExtractedToolCallInformation(
|
||||
tools_called=(len(tool_calls) > 0),
|
||||
tool_calls=tool_calls,
|
||||
content=content if content else None,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
logger.exception("Error in extracting tool call from response.")
|
||||
return ExtractedToolCallInformation(tools_called=False,
|
||||
tool_calls=[],
|
||||
content=model_output)
|
||||
|
||||
def extract_tool_calls_streaming(
|
||||
self,
|
||||
previous_text: str,
|
||||
current_text: str,
|
||||
delta_text: str,
|
||||
previous_token_ids: Sequence[int],
|
||||
current_token_ids: Sequence[int],
|
||||
delta_token_ids: Sequence[int],
|
||||
request: ChatCompletionRequest,
|
||||
) -> Union[DeltaMessage, None]:
|
||||
# Store request for type conversion
|
||||
if not previous_text:
|
||||
self._reset_streaming_state()
|
||||
self.streaming_request = request
|
||||
|
||||
# If no delta text, return None unless it's an EOS token after tool calls
|
||||
if not delta_text:
|
||||
# Check if this is an EOS token after all tool calls are complete
|
||||
# We check for tool calls in the text even if is_tool_call_started is False
|
||||
# because it might have been reset after processing all tools
|
||||
if delta_token_ids and self.tool_call_end_token_id not in delta_token_ids:
|
||||
# Count complete tool calls
|
||||
complete_calls = len(
|
||||
self.tool_call_complete_regex.findall(current_text))
|
||||
|
||||
# If we have completed tool calls and populated prev_tool_call_arr
|
||||
if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
|
||||
# Check if all tool calls are closed
|
||||
open_calls = current_text.count(
|
||||
self.tool_call_start_token) - current_text.count(
|
||||
self.tool_call_end_token)
|
||||
if open_calls == 0:
|
||||
# Return empty delta message to allow finish_reason processing
|
||||
return DeltaMessage(content="")
|
||||
elif not self.is_tool_call_started and current_text:
|
||||
# This is a regular content response that's now complete
|
||||
return DeltaMessage(content="")
|
||||
return None
|
||||
|
||||
# Update accumulated text
|
||||
self.accumulated_text = current_text
|
||||
|
||||
# Check if we need to advance to next tool
|
||||
if self.json_closed and not self.in_function:
|
||||
# Check if this tool call has ended
|
||||
tool_ends = current_text.count(self.tool_call_end_token)
|
||||
if tool_ends > self.current_tool_index:
|
||||
# This tool has ended, advance to next
|
||||
self.current_tool_index += 1
|
||||
self.header_sent = False
|
||||
self.param_count = 0
|
||||
self.json_started = False
|
||||
self.json_closed = False
|
||||
self.accumulated_params = {}
|
||||
|
||||
# Check if there are more tool calls
|
||||
tool_starts = current_text.count(self.tool_call_start_token)
|
||||
if self.current_tool_index >= tool_starts:
|
||||
# No more tool calls
|
||||
self.is_tool_call_started = False
|
||||
# Continue processing next tool
|
||||
return None
|
||||
|
||||
# Handle normal content before tool calls
|
||||
if not self.is_tool_call_started:
|
||||
# Check if tool call is starting
|
||||
if self.tool_call_start_token_id in delta_token_ids or self.tool_call_start_token in delta_text:
|
||||
self.is_tool_call_started = True
|
||||
# Return any content before the tool call
|
||||
if self.tool_call_start_token in delta_text:
|
||||
content_before = delta_text[:delta_text.index(
|
||||
self.tool_call_start_token)]
|
||||
if content_before:
|
||||
return DeltaMessage(content=content_before)
|
||||
return None
|
||||
else:
|
||||
# Check if we're between tool calls - skip whitespace
|
||||
if current_text.rstrip().endswith(self.tool_call_end_token):
|
||||
# We just ended a tool call, skip whitespace
|
||||
if delta_text.strip() == "":
|
||||
return None
|
||||
# Normal content, no tool call
|
||||
return DeltaMessage(content=delta_text)
|
||||
|
||||
# Check if we're between tool calls (waiting for next one)
|
||||
# Count tool calls we've seen vs processed
|
||||
tool_starts_count = current_text.count(self.tool_call_start_token)
|
||||
if self.current_tool_index >= tool_starts_count:
|
||||
# We're past all tool calls, shouldn't be here
|
||||
return None
|
||||
|
||||
# We're in a tool call, find the current tool call portion
|
||||
# Need to find the correct tool call based on current_tool_index
|
||||
tool_starts = []
|
||||
idx = 0
|
||||
while True:
|
||||
idx = current_text.find(self.tool_call_start_token, idx)
|
||||
if idx == -1:
|
||||
break
|
||||
tool_starts.append(idx)
|
||||
idx += len(self.tool_call_start_token)
|
||||
|
||||
if self.current_tool_index >= len(tool_starts):
|
||||
# No more tool calls to process yet
|
||||
return None
|
||||
|
||||
tool_start_idx = tool_starts[self.current_tool_index]
|
||||
# Find where this tool call ends (or current position if not ended yet)
|
||||
tool_end_idx = current_text.find(self.tool_call_end_token,
|
||||
tool_start_idx)
|
||||
if tool_end_idx == -1:
|
||||
tool_text = current_text[tool_start_idx:]
|
||||
else:
|
||||
tool_text = current_text[tool_start_idx:tool_end_idx +
|
||||
len(self.tool_call_end_token)]
|
||||
|
||||
# Looking for function header
|
||||
if not self.header_sent:
|
||||
if self.tool_call_prefix in tool_text:
|
||||
func_start = tool_text.find(self.tool_call_prefix) + len(
|
||||
self.tool_call_prefix)
|
||||
func_end = tool_text.find(">", func_start)
|
||||
|
||||
if func_end != -1:
|
||||
# Found complete function name
|
||||
self.current_function_name = tool_text[func_start:func_end]
|
||||
self.current_tool_id = self._generate_tool_call_id()
|
||||
self.header_sent = True
|
||||
self.in_function = True
|
||||
|
||||
# IMPORTANT: Add to prev_tool_call_arr immediately when we detect a tool call
|
||||
# This ensures finish_reason="tool_calls" even if parsing isn't complete
|
||||
already_added = any(
|
||||
tool.get("name") == self.current_function_name
|
||||
for tool in self.prev_tool_call_arr)
|
||||
if not already_added:
|
||||
self.prev_tool_call_arr.append({
|
||||
"name": self.current_function_name,
|
||||
"arguments":
|
||||
"{}", # Placeholder, will be updated later
|
||||
})
|
||||
|
||||
# Send header with function info
|
||||
return DeltaMessage(tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=self.current_tool_index,
|
||||
id=self.current_tool_id,
|
||||
function=DeltaFunctionCall(
|
||||
name=self.current_function_name, arguments=""),
|
||||
type="function",
|
||||
)
|
||||
])
|
||||
return None
|
||||
|
||||
# We've sent header, now handle function body
|
||||
if self.in_function:
|
||||
# Send opening brace if not sent yet
|
||||
if not self.json_started and self.parameter_prefix not in delta_text:
|
||||
self.json_started = True
|
||||
return DeltaMessage(tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=self.current_tool_index,
|
||||
function=DeltaFunctionCall(arguments="{"),
|
||||
)
|
||||
])
|
||||
|
||||
# Make sure json_started is set if we're processing parameters
|
||||
if not self.json_started:
|
||||
self.json_started = True
|
||||
|
||||
# Check for function end in accumulated text
|
||||
if not self.json_closed and self.function_end_token in tool_text:
|
||||
# Close JSON
|
||||
self.json_closed = True
|
||||
|
||||
# Extract the complete tool call to update prev_tool_call_arr with final arguments
|
||||
# Find the function content
|
||||
func_start = tool_text.find(self.tool_call_prefix) + len(
|
||||
self.tool_call_prefix)
|
||||
func_content_end = tool_text.find(self.function_end_token,
|
||||
func_start)
|
||||
if func_content_end != -1:
|
||||
func_content = tool_text[func_start:func_content_end]
|
||||
# Parse to get the complete arguments
|
||||
try:
|
||||
parsed_tool = self._parse_xml_function_call(
|
||||
func_content, self.streaming_request.tools
|
||||
if self.streaming_request else None)
|
||||
if parsed_tool:
|
||||
# Update existing entry in prev_tool_call_arr with complete arguments
|
||||
for i, tool in enumerate(self.prev_tool_call_arr):
|
||||
if tool.get(
|
||||
"name") == parsed_tool.function.name:
|
||||
self.prev_tool_call_arr[i][
|
||||
"arguments"] = parsed_tool.function.arguments
|
||||
break
|
||||
except Exception:
|
||||
pass # Ignore parsing errors during streaming
|
||||
|
||||
result = DeltaMessage(tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=self.current_tool_index,
|
||||
function=DeltaFunctionCall(arguments="}"),
|
||||
)
|
||||
])
|
||||
|
||||
# Reset state for next tool
|
||||
self.in_function = False
|
||||
self.json_closed = True
|
||||
self.accumulated_params = {}
|
||||
|
||||
return result
|
||||
|
||||
# Look for parameters
|
||||
# Find all parameter starts
|
||||
param_starts = []
|
||||
idx = 0
|
||||
while True:
|
||||
idx = tool_text.find(self.parameter_prefix, idx)
|
||||
if idx == -1:
|
||||
break
|
||||
param_starts.append(idx)
|
||||
idx += len(self.parameter_prefix)
|
||||
|
||||
# Check if we should start a new parameter
|
||||
if not self.in_param and self.param_count < len(param_starts):
|
||||
|
||||
if len(param_starts) > self.param_count:
|
||||
# Process the next parameter
|
||||
param_idx = param_starts[self.param_count]
|
||||
param_start = param_idx + len(self.parameter_prefix)
|
||||
remaining = tool_text[param_start:]
|
||||
|
||||
if ">" in remaining:
|
||||
# We have the complete parameter name
|
||||
name_end = remaining.find(">")
|
||||
self.current_param_name = remaining[:name_end]
|
||||
|
||||
# Find the parameter value
|
||||
value_start = param_start + name_end + 1
|
||||
value_text = tool_text[value_start:]
|
||||
if value_text.startswith("\n"):
|
||||
value_text = value_text[1:]
|
||||
|
||||
# Find where this parameter ends
|
||||
param_end_idx = value_text.find(
|
||||
self.parameter_end_token)
|
||||
if param_end_idx == -1:
|
||||
# No closing tag, look for next parameter or function end
|
||||
next_param_idx = value_text.find(
|
||||
self.parameter_prefix)
|
||||
func_end_idx = value_text.find(
|
||||
self.function_end_token)
|
||||
|
||||
if next_param_idx != -1 and (func_end_idx == -1
|
||||
or next_param_idx
|
||||
< func_end_idx):
|
||||
param_end_idx = next_param_idx
|
||||
elif func_end_idx != -1:
|
||||
param_end_idx = func_end_idx
|
||||
else:
|
||||
# Neither found, check if tool call is complete
|
||||
if self.tool_call_end_token in tool_text:
|
||||
# Tool call is complete, so parameter must be complete too
|
||||
# Use all remaining text before function end as value
|
||||
param_end_idx = len(value_text)
|
||||
else:
|
||||
# Still streaming, wait for more content
|
||||
return None
|
||||
|
||||
if param_end_idx != -1:
|
||||
# Complete parameter found
|
||||
param_value = value_text[:param_end_idx]
|
||||
if param_value.endswith("\n"):
|
||||
param_value = param_value[:-1]
|
||||
|
||||
# Store raw value for later processing
|
||||
self.accumulated_params[
|
||||
self.current_param_name] = param_value
|
||||
|
||||
# Get parameter configuration for type conversion
|
||||
param_config = self._get_arguments_config(
|
||||
self.current_function_name,
|
||||
self.streaming_request.tools
|
||||
if self.streaming_request else None)
|
||||
|
||||
# Convert the parameter value to the appropriate type
|
||||
converted_value = self._convert_param_value(
|
||||
param_value, self.current_param_name,
|
||||
param_config, self.current_function_name)
|
||||
|
||||
# Build JSON fragment based on the converted type
|
||||
# Use json.dumps to properly serialize the value
|
||||
serialized_value = json.dumps(converted_value,
|
||||
ensure_ascii=False)
|
||||
|
||||
if self.param_count == 0:
|
||||
json_fragment = f'"{self.current_param_name}": {serialized_value}'
|
||||
else:
|
||||
json_fragment = f', "{self.current_param_name}": {serialized_value}'
|
||||
|
||||
self.param_count += 1
|
||||
|
||||
return DeltaMessage(tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=self.current_tool_index,
|
||||
function=DeltaFunctionCall(
|
||||
arguments=json_fragment),
|
||||
)
|
||||
])
|
||||
|
||||
# Continue parameter value - Not used in the current implementation
|
||||
# since we process complete parameters above
|
||||
if self.in_param:
|
||||
if self.parameter_end_token in delta_text:
|
||||
# End of parameter
|
||||
end_idx = delta_text.find(self.parameter_end_token)
|
||||
value_chunk = delta_text[:end_idx]
|
||||
|
||||
# Skip past > if at start
|
||||
if not self.current_param_value and ">" in value_chunk:
|
||||
gt_idx = value_chunk.find(">")
|
||||
value_chunk = value_chunk[gt_idx + 1:]
|
||||
|
||||
if not self.current_param_value and value_chunk.startswith(
|
||||
"\n"):
|
||||
value_chunk = value_chunk[1:]
|
||||
|
||||
# Store complete value
|
||||
full_value = self.current_param_value + value_chunk
|
||||
self.accumulated_params[
|
||||
self.current_param_name] = full_value
|
||||
|
||||
# Get parameter configuration for type conversion
|
||||
param_config = self._get_arguments_config(
|
||||
self.current_function_name,
|
||||
self.streaming_request.tools
|
||||
if self.streaming_request else None)
|
||||
|
||||
# Convert the parameter value to the appropriate type
|
||||
converted_value = self._convert_param_value(
|
||||
full_value, self.current_param_name, param_config,
|
||||
self.current_function_name)
|
||||
|
||||
# Serialize the converted value
|
||||
serialized_value = json.dumps(converted_value,
|
||||
ensure_ascii=False)
|
||||
|
||||
# Since we've been streaming the quoted version, we need to close it properly
|
||||
# This is complex - for now just complete the value
|
||||
self.in_param = False
|
||||
self.current_param_value = ""
|
||||
|
||||
# Just close the current parameter string
|
||||
return DeltaMessage(tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=self.current_tool_index,
|
||||
function=DeltaFunctionCall(
|
||||
arguments='"'), # Close the string quote
|
||||
)
|
||||
])
|
||||
else:
|
||||
# Continue accumulating value
|
||||
value_chunk = delta_text
|
||||
|
||||
# Handle first chunk after param name
|
||||
if not self.current_param_value and ">" in value_chunk:
|
||||
gt_idx = value_chunk.find(">")
|
||||
value_chunk = value_chunk[gt_idx + 1:]
|
||||
|
||||
if not self.current_param_value and value_chunk.startswith(
|
||||
"\n"):
|
||||
value_chunk = value_chunk[1:]
|
||||
|
||||
if value_chunk:
|
||||
# Stream the escaped delta
|
||||
prev_escaped = json.dumps(
|
||||
self.current_param_value, ensure_ascii=False
|
||||
)[1:-1] if self.current_param_value else ""
|
||||
self.current_param_value += value_chunk
|
||||
full_escaped = json.dumps(self.current_param_value,
|
||||
ensure_ascii=False)[1:-1]
|
||||
delta_escaped = full_escaped[len(prev_escaped):]
|
||||
|
||||
if delta_escaped:
|
||||
return DeltaMessage(tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=self.current_tool_index,
|
||||
function=DeltaFunctionCall(
|
||||
arguments=delta_escaped),
|
||||
)
|
||||
])
|
||||
|
||||
return None
|
||||
303318
trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/tokenizer.json
Normal file
303318
trace_model_meta/Qwen/Qwen3-Coder-480B-A35B-Instruct/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
86
trace_model_meta/ZhipuAI/GLM-5-FP8/chat_template.jinja
Normal file
86
trace_model_meta/ZhipuAI/GLM-5-FP8/chat_template.jinja
Normal file
@@ -0,0 +1,86 @@
|
||||
[gMASK]<sop>
|
||||
{%- if tools -%}
|
||||
<|system|>
|
||||
# Tools
|
||||
|
||||
You may call one or more functions to assist with the user query.
|
||||
|
||||
You are provided with function signatures within <tools></tools> XML tags:
|
||||
<tools>
|
||||
{% for tool in tools %}
|
||||
{{ tool | tojson(ensure_ascii=False) }}
|
||||
{% endfor %}
|
||||
</tools>
|
||||
|
||||
For each function call, output the function name and arguments within the following XML format:
|
||||
<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
|
||||
{%- macro visible_text(content) -%}
|
||||
{%- if content is string -%}
|
||||
{{- content }}
|
||||
{%- elif content is iterable and content is not mapping -%}
|
||||
{%- for item in content -%}
|
||||
{%- if item is mapping and item.type == 'text' -%}
|
||||
{{- item.text }}
|
||||
{%- elif item is string -%}
|
||||
{{- item }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- else -%}
|
||||
{{- content }}
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
{%- set ns = namespace(last_user_index=-1) %}
|
||||
{%- for m in messages %}
|
||||
{%- if m.role == 'user' %}
|
||||
{%- set ns.last_user_index = loop.index0 -%}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for m in messages -%}
|
||||
{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
|
||||
{%- elif m.role == 'assistant' -%}
|
||||
<|assistant|>
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- set content = visible_text(m.content) %}
|
||||
{%- if m.reasoning_content is string %}
|
||||
{%- set reasoning_content = m.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
|
||||
{{ '<think>' + reasoning_content.strip() + '</think>'}}
|
||||
{%- else -%}
|
||||
{{ '</think>' }}
|
||||
{%- endif -%}
|
||||
{%- if content.strip() -%}
|
||||
{{ content.strip() }}
|
||||
{%- endif -%}
|
||||
{% if m.tool_calls %}
|
||||
{% for tc in m.tool_calls %}
|
||||
{%- if tc.function %}
|
||||
{%- set tc = tc.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>' + tc.name -}}
|
||||
{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
|
||||
{% endif %}
|
||||
{%- elif m.role == 'tool' -%}
|
||||
{%- if m.content is string -%}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|observation|>' }}
|
||||
{%- endif %}
|
||||
{{- '<tool_response>' }}
|
||||
{{- m.content }}
|
||||
{{- '</tool_response>' }}
|
||||
{%- else -%}
|
||||
<|observation|>{% for tr in m.content %}
|
||||
<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
|
||||
{% endif -%}
|
||||
{%- elif m.role == 'system' -%}
|
||||
<|system|>{{ visible_text(m.content) }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
<|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
|
||||
{%- endif -%}
|
||||
782
trace_model_meta/ZhipuAI/GLM-5-FP8/config.json
Normal file
782
trace_model_meta/ZhipuAI/GLM-5-FP8/config.json
Normal file
@@ -0,0 +1,782 @@
|
||||
{
|
||||
"architectures": [
|
||||
"GlmMoeDsaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": [
|
||||
154820,
|
||||
154827,
|
||||
154829
|
||||
],
|
||||
"ep_size": 1,
|
||||
"first_k_dense_replace": 3,
|
||||
"hidden_act": "silu",
|
||||
"head_dim": 64,
|
||||
"hidden_size": 6144,
|
||||
"index_head_dim": 128,
|
||||
"index_n_heads": 32,
|
||||
"index_topk": 2048,
|
||||
"indexer_rope_interleave": true,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"kv_lora_rank": 512,
|
||||
"max_position_embeddings": 202752,
|
||||
"moe_intermediate_size": 2048,
|
||||
"moe_layer_freq": 1,
|
||||
"model_type": "glm_moe_dsa",
|
||||
"n_group": 1,
|
||||
"n_routed_experts": 256,
|
||||
"n_shared_experts": 1,
|
||||
"norm_topk_prob": true,
|
||||
"num_attention_heads": 64,
|
||||
"num_experts_per_tok": 8,
|
||||
"num_hidden_layers": 78,
|
||||
"num_key_value_heads": 64,
|
||||
"num_nextn_predict_layers": 1,
|
||||
"pad_token_id": 154820,
|
||||
"pretraining_tp": 1,
|
||||
"q_lora_rank": 2048,
|
||||
"qk_head_dim": 256,
|
||||
"qk_nope_head_dim": 192,
|
||||
"qk_rope_head_dim": 64,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_interleave": true,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"routed_scaling_factor": 2.5,
|
||||
"scoring_func": "sigmoid",
|
||||
"tie_word_embeddings": false,
|
||||
"topk_group": 1,
|
||||
"topk_method": "noaux_tc",
|
||||
"transformers_version": "5.0.2.dev0",
|
||||
"use_cache": true,
|
||||
"v_head_dim": 256,
|
||||
"vocab_size": 154880,
|
||||
"quantization_config": {
|
||||
"activation_scheme": "dynamic",
|
||||
"fmt": "e4m3",
|
||||
"quant_method": "fp8",
|
||||
"weight_block_size": [
|
||||
128,
|
||||
128
|
||||
],
|
||||
"modules_to_not_convert": [
|
||||
"lm_head",
|
||||
"model.embed_tokens",
|
||||
"model.layers.0.input_layernorm",
|
||||
"model.layers.0.post_attention_layernorm",
|
||||
"model.layers.0.self_attn.indexer.k_norm",
|
||||
"model.layers.0.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.0.self_attn.indexers_proj",
|
||||
"model.layers.0.self_attn.kv_a_layernorm",
|
||||
"model.layers.0.self_attn.q_a_layernorm",
|
||||
"model.layers.1.input_layernorm",
|
||||
"model.layers.1.post_attention_layernorm",
|
||||
"model.layers.1.self_attn.indexer.k_norm",
|
||||
"model.layers.1.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.1.self_attn.indexers_proj",
|
||||
"model.layers.1.self_attn.kv_a_layernorm",
|
||||
"model.layers.1.self_attn.q_a_layernorm",
|
||||
"model.layers.2.input_layernorm",
|
||||
"model.layers.2.post_attention_layernorm",
|
||||
"model.layers.2.self_attn.indexer.k_norm",
|
||||
"model.layers.2.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.2.self_attn.indexers_proj",
|
||||
"model.layers.2.self_attn.kv_a_layernorm",
|
||||
"model.layers.2.self_attn.q_a_layernorm",
|
||||
"model.layers.3.input_layernorm",
|
||||
"model.layers.3.mlp.gate",
|
||||
"model.layers.3.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.3.post_attention_layernorm",
|
||||
"model.layers.3.self_attn.indexer.k_norm",
|
||||
"model.layers.3.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.3.self_attn.indexers_proj",
|
||||
"model.layers.3.self_attn.kv_a_layernorm",
|
||||
"model.layers.3.self_attn.q_a_layernorm",
|
||||
"model.layers.4.input_layernorm",
|
||||
"model.layers.4.mlp.gate",
|
||||
"model.layers.4.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.4.post_attention_layernorm",
|
||||
"model.layers.4.self_attn.indexer.k_norm",
|
||||
"model.layers.4.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.4.self_attn.indexers_proj",
|
||||
"model.layers.4.self_attn.kv_a_layernorm",
|
||||
"model.layers.4.self_attn.q_a_layernorm",
|
||||
"model.layers.5.input_layernorm",
|
||||
"model.layers.5.mlp.gate",
|
||||
"model.layers.5.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.5.post_attention_layernorm",
|
||||
"model.layers.5.self_attn.indexer.k_norm",
|
||||
"model.layers.5.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.5.self_attn.indexers_proj",
|
||||
"model.layers.5.self_attn.kv_a_layernorm",
|
||||
"model.layers.5.self_attn.q_a_layernorm",
|
||||
"model.layers.6.input_layernorm",
|
||||
"model.layers.6.mlp.gate",
|
||||
"model.layers.6.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.6.post_attention_layernorm",
|
||||
"model.layers.6.self_attn.indexer.k_norm",
|
||||
"model.layers.6.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.6.self_attn.indexers_proj",
|
||||
"model.layers.6.self_attn.kv_a_layernorm",
|
||||
"model.layers.6.self_attn.q_a_layernorm",
|
||||
"model.layers.7.input_layernorm",
|
||||
"model.layers.7.mlp.gate",
|
||||
"model.layers.7.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.7.post_attention_layernorm",
|
||||
"model.layers.7.self_attn.indexer.k_norm",
|
||||
"model.layers.7.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.7.self_attn.indexers_proj",
|
||||
"model.layers.7.self_attn.kv_a_layernorm",
|
||||
"model.layers.7.self_attn.q_a_layernorm",
|
||||
"model.layers.8.input_layernorm",
|
||||
"model.layers.8.mlp.gate",
|
||||
"model.layers.8.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.8.post_attention_layernorm",
|
||||
"model.layers.8.self_attn.indexer.k_norm",
|
||||
"model.layers.8.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.8.self_attn.indexers_proj",
|
||||
"model.layers.8.self_attn.kv_a_layernorm",
|
||||
"model.layers.8.self_attn.q_a_layernorm",
|
||||
"model.layers.9.input_layernorm",
|
||||
"model.layers.9.mlp.gate",
|
||||
"model.layers.9.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.9.post_attention_layernorm",
|
||||
"model.layers.9.self_attn.indexer.k_norm",
|
||||
"model.layers.9.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.9.self_attn.indexers_proj",
|
||||
"model.layers.9.self_attn.kv_a_layernorm",
|
||||
"model.layers.9.self_attn.q_a_layernorm",
|
||||
"model.layers.10.input_layernorm",
|
||||
"model.layers.10.mlp.gate",
|
||||
"model.layers.10.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.10.post_attention_layernorm",
|
||||
"model.layers.10.self_attn.indexer.k_norm",
|
||||
"model.layers.10.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.10.self_attn.indexers_proj",
|
||||
"model.layers.10.self_attn.kv_a_layernorm",
|
||||
"model.layers.10.self_attn.q_a_layernorm",
|
||||
"model.layers.11.input_layernorm",
|
||||
"model.layers.11.mlp.gate",
|
||||
"model.layers.11.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.11.post_attention_layernorm",
|
||||
"model.layers.11.self_attn.indexer.k_norm",
|
||||
"model.layers.11.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.11.self_attn.indexers_proj",
|
||||
"model.layers.11.self_attn.kv_a_layernorm",
|
||||
"model.layers.11.self_attn.q_a_layernorm",
|
||||
"model.layers.12.input_layernorm",
|
||||
"model.layers.12.mlp.gate",
|
||||
"model.layers.12.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.12.post_attention_layernorm",
|
||||
"model.layers.12.self_attn.indexer.k_norm",
|
||||
"model.layers.12.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.12.self_attn.indexers_proj",
|
||||
"model.layers.12.self_attn.kv_a_layernorm",
|
||||
"model.layers.12.self_attn.q_a_layernorm",
|
||||
"model.layers.13.input_layernorm",
|
||||
"model.layers.13.mlp.gate",
|
||||
"model.layers.13.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.13.post_attention_layernorm",
|
||||
"model.layers.13.self_attn.indexer.k_norm",
|
||||
"model.layers.13.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.13.self_attn.indexers_proj",
|
||||
"model.layers.13.self_attn.kv_a_layernorm",
|
||||
"model.layers.13.self_attn.q_a_layernorm",
|
||||
"model.layers.14.input_layernorm",
|
||||
"model.layers.14.mlp.gate",
|
||||
"model.layers.14.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.14.post_attention_layernorm",
|
||||
"model.layers.14.self_attn.indexer.k_norm",
|
||||
"model.layers.14.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.14.self_attn.indexers_proj",
|
||||
"model.layers.14.self_attn.kv_a_layernorm",
|
||||
"model.layers.14.self_attn.q_a_layernorm",
|
||||
"model.layers.15.input_layernorm",
|
||||
"model.layers.15.mlp.gate",
|
||||
"model.layers.15.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.15.post_attention_layernorm",
|
||||
"model.layers.15.self_attn.indexer.k_norm",
|
||||
"model.layers.15.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.15.self_attn.indexers_proj",
|
||||
"model.layers.15.self_attn.kv_a_layernorm",
|
||||
"model.layers.15.self_attn.q_a_layernorm",
|
||||
"model.layers.16.input_layernorm",
|
||||
"model.layers.16.mlp.gate",
|
||||
"model.layers.16.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.16.post_attention_layernorm",
|
||||
"model.layers.16.self_attn.indexer.k_norm",
|
||||
"model.layers.16.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.16.self_attn.indexers_proj",
|
||||
"model.layers.16.self_attn.kv_a_layernorm",
|
||||
"model.layers.16.self_attn.q_a_layernorm",
|
||||
"model.layers.17.input_layernorm",
|
||||
"model.layers.17.mlp.gate",
|
||||
"model.layers.17.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.17.post_attention_layernorm",
|
||||
"model.layers.17.self_attn.indexer.k_norm",
|
||||
"model.layers.17.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.17.self_attn.indexers_proj",
|
||||
"model.layers.17.self_attn.kv_a_layernorm",
|
||||
"model.layers.17.self_attn.q_a_layernorm",
|
||||
"model.layers.18.input_layernorm",
|
||||
"model.layers.18.mlp.gate",
|
||||
"model.layers.18.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.18.post_attention_layernorm",
|
||||
"model.layers.18.self_attn.indexer.k_norm",
|
||||
"model.layers.18.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.18.self_attn.indexers_proj",
|
||||
"model.layers.18.self_attn.kv_a_layernorm",
|
||||
"model.layers.18.self_attn.q_a_layernorm",
|
||||
"model.layers.19.input_layernorm",
|
||||
"model.layers.19.mlp.gate",
|
||||
"model.layers.19.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.19.post_attention_layernorm",
|
||||
"model.layers.19.self_attn.indexer.k_norm",
|
||||
"model.layers.19.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.19.self_attn.indexers_proj",
|
||||
"model.layers.19.self_attn.kv_a_layernorm",
|
||||
"model.layers.19.self_attn.q_a_layernorm",
|
||||
"model.layers.20.input_layernorm",
|
||||
"model.layers.20.mlp.gate",
|
||||
"model.layers.20.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.20.post_attention_layernorm",
|
||||
"model.layers.20.self_attn.indexer.k_norm",
|
||||
"model.layers.20.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.20.self_attn.indexers_proj",
|
||||
"model.layers.20.self_attn.kv_a_layernorm",
|
||||
"model.layers.20.self_attn.q_a_layernorm",
|
||||
"model.layers.21.input_layernorm",
|
||||
"model.layers.21.mlp.gate",
|
||||
"model.layers.21.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.21.post_attention_layernorm",
|
||||
"model.layers.21.self_attn.indexer.k_norm",
|
||||
"model.layers.21.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.21.self_attn.indexers_proj",
|
||||
"model.layers.21.self_attn.kv_a_layernorm",
|
||||
"model.layers.21.self_attn.q_a_layernorm",
|
||||
"model.layers.22.input_layernorm",
|
||||
"model.layers.22.mlp.gate",
|
||||
"model.layers.22.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.22.post_attention_layernorm",
|
||||
"model.layers.22.self_attn.indexer.k_norm",
|
||||
"model.layers.22.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.22.self_attn.indexers_proj",
|
||||
"model.layers.22.self_attn.kv_a_layernorm",
|
||||
"model.layers.22.self_attn.q_a_layernorm",
|
||||
"model.layers.23.input_layernorm",
|
||||
"model.layers.23.mlp.gate",
|
||||
"model.layers.23.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.23.post_attention_layernorm",
|
||||
"model.layers.23.self_attn.indexer.k_norm",
|
||||
"model.layers.23.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.23.self_attn.indexers_proj",
|
||||
"model.layers.23.self_attn.kv_a_layernorm",
|
||||
"model.layers.23.self_attn.q_a_layernorm",
|
||||
"model.layers.24.input_layernorm",
|
||||
"model.layers.24.mlp.gate",
|
||||
"model.layers.24.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.24.post_attention_layernorm",
|
||||
"model.layers.24.self_attn.indexer.k_norm",
|
||||
"model.layers.24.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.24.self_attn.indexers_proj",
|
||||
"model.layers.24.self_attn.kv_a_layernorm",
|
||||
"model.layers.24.self_attn.q_a_layernorm",
|
||||
"model.layers.25.input_layernorm",
|
||||
"model.layers.25.mlp.gate",
|
||||
"model.layers.25.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.25.post_attention_layernorm",
|
||||
"model.layers.25.self_attn.indexer.k_norm",
|
||||
"model.layers.25.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.25.self_attn.indexers_proj",
|
||||
"model.layers.25.self_attn.kv_a_layernorm",
|
||||
"model.layers.25.self_attn.q_a_layernorm",
|
||||
"model.layers.26.input_layernorm",
|
||||
"model.layers.26.mlp.gate",
|
||||
"model.layers.26.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.26.post_attention_layernorm",
|
||||
"model.layers.26.self_attn.indexer.k_norm",
|
||||
"model.layers.26.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.26.self_attn.indexers_proj",
|
||||
"model.layers.26.self_attn.kv_a_layernorm",
|
||||
"model.layers.26.self_attn.q_a_layernorm",
|
||||
"model.layers.27.input_layernorm",
|
||||
"model.layers.27.mlp.gate",
|
||||
"model.layers.27.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.27.post_attention_layernorm",
|
||||
"model.layers.27.self_attn.indexer.k_norm",
|
||||
"model.layers.27.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.27.self_attn.indexers_proj",
|
||||
"model.layers.27.self_attn.kv_a_layernorm",
|
||||
"model.layers.27.self_attn.q_a_layernorm",
|
||||
"model.layers.28.input_layernorm",
|
||||
"model.layers.28.mlp.gate",
|
||||
"model.layers.28.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.28.post_attention_layernorm",
|
||||
"model.layers.28.self_attn.indexer.k_norm",
|
||||
"model.layers.28.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.28.self_attn.indexers_proj",
|
||||
"model.layers.28.self_attn.kv_a_layernorm",
|
||||
"model.layers.28.self_attn.q_a_layernorm",
|
||||
"model.layers.29.input_layernorm",
|
||||
"model.layers.29.mlp.gate",
|
||||
"model.layers.29.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.29.post_attention_layernorm",
|
||||
"model.layers.29.self_attn.indexer.k_norm",
|
||||
"model.layers.29.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.29.self_attn.indexers_proj",
|
||||
"model.layers.29.self_attn.kv_a_layernorm",
|
||||
"model.layers.29.self_attn.q_a_layernorm",
|
||||
"model.layers.30.input_layernorm",
|
||||
"model.layers.30.mlp.gate",
|
||||
"model.layers.30.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.30.post_attention_layernorm",
|
||||
"model.layers.30.self_attn.indexer.k_norm",
|
||||
"model.layers.30.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.30.self_attn.indexers_proj",
|
||||
"model.layers.30.self_attn.kv_a_layernorm",
|
||||
"model.layers.30.self_attn.q_a_layernorm",
|
||||
"model.layers.31.input_layernorm",
|
||||
"model.layers.31.mlp.gate",
|
||||
"model.layers.31.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.31.post_attention_layernorm",
|
||||
"model.layers.31.self_attn.indexer.k_norm",
|
||||
"model.layers.31.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.31.self_attn.indexers_proj",
|
||||
"model.layers.31.self_attn.kv_a_layernorm",
|
||||
"model.layers.31.self_attn.q_a_layernorm",
|
||||
"model.layers.32.input_layernorm",
|
||||
"model.layers.32.mlp.gate",
|
||||
"model.layers.32.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.32.post_attention_layernorm",
|
||||
"model.layers.32.self_attn.indexer.k_norm",
|
||||
"model.layers.32.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.32.self_attn.indexers_proj",
|
||||
"model.layers.32.self_attn.kv_a_layernorm",
|
||||
"model.layers.32.self_attn.q_a_layernorm",
|
||||
"model.layers.33.input_layernorm",
|
||||
"model.layers.33.mlp.gate",
|
||||
"model.layers.33.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.33.post_attention_layernorm",
|
||||
"model.layers.33.self_attn.indexer.k_norm",
|
||||
"model.layers.33.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.33.self_attn.indexers_proj",
|
||||
"model.layers.33.self_attn.kv_a_layernorm",
|
||||
"model.layers.33.self_attn.q_a_layernorm",
|
||||
"model.layers.34.input_layernorm",
|
||||
"model.layers.34.mlp.gate",
|
||||
"model.layers.34.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.34.post_attention_layernorm",
|
||||
"model.layers.34.self_attn.indexer.k_norm",
|
||||
"model.layers.34.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.34.self_attn.indexers_proj",
|
||||
"model.layers.34.self_attn.kv_a_layernorm",
|
||||
"model.layers.34.self_attn.q_a_layernorm",
|
||||
"model.layers.35.input_layernorm",
|
||||
"model.layers.35.mlp.gate",
|
||||
"model.layers.35.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.35.post_attention_layernorm",
|
||||
"model.layers.35.self_attn.indexer.k_norm",
|
||||
"model.layers.35.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.35.self_attn.indexers_proj",
|
||||
"model.layers.35.self_attn.kv_a_layernorm",
|
||||
"model.layers.35.self_attn.q_a_layernorm",
|
||||
"model.layers.36.input_layernorm",
|
||||
"model.layers.36.mlp.gate",
|
||||
"model.layers.36.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.36.post_attention_layernorm",
|
||||
"model.layers.36.self_attn.indexer.k_norm",
|
||||
"model.layers.36.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.36.self_attn.indexers_proj",
|
||||
"model.layers.36.self_attn.kv_a_layernorm",
|
||||
"model.layers.36.self_attn.q_a_layernorm",
|
||||
"model.layers.37.input_layernorm",
|
||||
"model.layers.37.mlp.gate",
|
||||
"model.layers.37.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.37.post_attention_layernorm",
|
||||
"model.layers.37.self_attn.indexer.k_norm",
|
||||
"model.layers.37.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.37.self_attn.indexers_proj",
|
||||
"model.layers.37.self_attn.kv_a_layernorm",
|
||||
"model.layers.37.self_attn.q_a_layernorm",
|
||||
"model.layers.38.input_layernorm",
|
||||
"model.layers.38.mlp.gate",
|
||||
"model.layers.38.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.38.post_attention_layernorm",
|
||||
"model.layers.38.self_attn.indexer.k_norm",
|
||||
"model.layers.38.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.38.self_attn.indexers_proj",
|
||||
"model.layers.38.self_attn.kv_a_layernorm",
|
||||
"model.layers.38.self_attn.q_a_layernorm",
|
||||
"model.layers.39.input_layernorm",
|
||||
"model.layers.39.mlp.gate",
|
||||
"model.layers.39.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.39.post_attention_layernorm",
|
||||
"model.layers.39.self_attn.indexer.k_norm",
|
||||
"model.layers.39.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.39.self_attn.indexers_proj",
|
||||
"model.layers.39.self_attn.kv_a_layernorm",
|
||||
"model.layers.39.self_attn.q_a_layernorm",
|
||||
"model.layers.40.input_layernorm",
|
||||
"model.layers.40.mlp.gate",
|
||||
"model.layers.40.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.40.post_attention_layernorm",
|
||||
"model.layers.40.self_attn.indexer.k_norm",
|
||||
"model.layers.40.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.40.self_attn.indexers_proj",
|
||||
"model.layers.40.self_attn.kv_a_layernorm",
|
||||
"model.layers.40.self_attn.q_a_layernorm",
|
||||
"model.layers.41.input_layernorm",
|
||||
"model.layers.41.mlp.gate",
|
||||
"model.layers.41.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.41.post_attention_layernorm",
|
||||
"model.layers.41.self_attn.indexer.k_norm",
|
||||
"model.layers.41.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.41.self_attn.indexers_proj",
|
||||
"model.layers.41.self_attn.kv_a_layernorm",
|
||||
"model.layers.41.self_attn.q_a_layernorm",
|
||||
"model.layers.42.input_layernorm",
|
||||
"model.layers.42.mlp.gate",
|
||||
"model.layers.42.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.42.post_attention_layernorm",
|
||||
"model.layers.42.self_attn.indexer.k_norm",
|
||||
"model.layers.42.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.42.self_attn.indexers_proj",
|
||||
"model.layers.42.self_attn.kv_a_layernorm",
|
||||
"model.layers.42.self_attn.q_a_layernorm",
|
||||
"model.layers.43.input_layernorm",
|
||||
"model.layers.43.mlp.gate",
|
||||
"model.layers.43.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.43.post_attention_layernorm",
|
||||
"model.layers.43.self_attn.indexer.k_norm",
|
||||
"model.layers.43.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.43.self_attn.indexers_proj",
|
||||
"model.layers.43.self_attn.kv_a_layernorm",
|
||||
"model.layers.43.self_attn.q_a_layernorm",
|
||||
"model.layers.44.input_layernorm",
|
||||
"model.layers.44.mlp.gate",
|
||||
"model.layers.44.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.44.post_attention_layernorm",
|
||||
"model.layers.44.self_attn.indexer.k_norm",
|
||||
"model.layers.44.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.44.self_attn.indexers_proj",
|
||||
"model.layers.44.self_attn.kv_a_layernorm",
|
||||
"model.layers.44.self_attn.q_a_layernorm",
|
||||
"model.layers.45.input_layernorm",
|
||||
"model.layers.45.mlp.gate",
|
||||
"model.layers.45.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.45.post_attention_layernorm",
|
||||
"model.layers.45.self_attn.indexer.k_norm",
|
||||
"model.layers.45.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.45.self_attn.indexers_proj",
|
||||
"model.layers.45.self_attn.kv_a_layernorm",
|
||||
"model.layers.45.self_attn.q_a_layernorm",
|
||||
"model.layers.46.input_layernorm",
|
||||
"model.layers.46.mlp.gate",
|
||||
"model.layers.46.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.46.post_attention_layernorm",
|
||||
"model.layers.46.self_attn.indexer.k_norm",
|
||||
"model.layers.46.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.46.self_attn.indexers_proj",
|
||||
"model.layers.46.self_attn.kv_a_layernorm",
|
||||
"model.layers.46.self_attn.q_a_layernorm",
|
||||
"model.layers.47.input_layernorm",
|
||||
"model.layers.47.mlp.gate",
|
||||
"model.layers.47.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.47.post_attention_layernorm",
|
||||
"model.layers.47.self_attn.indexer.k_norm",
|
||||
"model.layers.47.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.47.self_attn.indexers_proj",
|
||||
"model.layers.47.self_attn.kv_a_layernorm",
|
||||
"model.layers.47.self_attn.q_a_layernorm",
|
||||
"model.layers.48.input_layernorm",
|
||||
"model.layers.48.mlp.gate",
|
||||
"model.layers.48.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.48.post_attention_layernorm",
|
||||
"model.layers.48.self_attn.indexer.k_norm",
|
||||
"model.layers.48.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.48.self_attn.indexers_proj",
|
||||
"model.layers.48.self_attn.kv_a_layernorm",
|
||||
"model.layers.48.self_attn.q_a_layernorm",
|
||||
"model.layers.49.input_layernorm",
|
||||
"model.layers.49.mlp.gate",
|
||||
"model.layers.49.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.49.post_attention_layernorm",
|
||||
"model.layers.49.self_attn.indexer.k_norm",
|
||||
"model.layers.49.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.49.self_attn.indexers_proj",
|
||||
"model.layers.49.self_attn.kv_a_layernorm",
|
||||
"model.layers.49.self_attn.q_a_layernorm",
|
||||
"model.layers.50.input_layernorm",
|
||||
"model.layers.50.mlp.gate",
|
||||
"model.layers.50.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.50.post_attention_layernorm",
|
||||
"model.layers.50.self_attn.indexer.k_norm",
|
||||
"model.layers.50.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.50.self_attn.indexers_proj",
|
||||
"model.layers.50.self_attn.kv_a_layernorm",
|
||||
"model.layers.50.self_attn.q_a_layernorm",
|
||||
"model.layers.51.input_layernorm",
|
||||
"model.layers.51.mlp.gate",
|
||||
"model.layers.51.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.51.post_attention_layernorm",
|
||||
"model.layers.51.self_attn.indexer.k_norm",
|
||||
"model.layers.51.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.51.self_attn.indexers_proj",
|
||||
"model.layers.51.self_attn.kv_a_layernorm",
|
||||
"model.layers.51.self_attn.q_a_layernorm",
|
||||
"model.layers.52.input_layernorm",
|
||||
"model.layers.52.mlp.gate",
|
||||
"model.layers.52.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.52.post_attention_layernorm",
|
||||
"model.layers.52.self_attn.indexer.k_norm",
|
||||
"model.layers.52.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.52.self_attn.indexers_proj",
|
||||
"model.layers.52.self_attn.kv_a_layernorm",
|
||||
"model.layers.52.self_attn.q_a_layernorm",
|
||||
"model.layers.53.input_layernorm",
|
||||
"model.layers.53.mlp.gate",
|
||||
"model.layers.53.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.53.post_attention_layernorm",
|
||||
"model.layers.53.self_attn.indexer.k_norm",
|
||||
"model.layers.53.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.53.self_attn.indexers_proj",
|
||||
"model.layers.53.self_attn.kv_a_layernorm",
|
||||
"model.layers.53.self_attn.q_a_layernorm",
|
||||
"model.layers.54.input_layernorm",
|
||||
"model.layers.54.mlp.gate",
|
||||
"model.layers.54.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.54.post_attention_layernorm",
|
||||
"model.layers.54.self_attn.indexer.k_norm",
|
||||
"model.layers.54.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.54.self_attn.indexers_proj",
|
||||
"model.layers.54.self_attn.kv_a_layernorm",
|
||||
"model.layers.54.self_attn.q_a_layernorm",
|
||||
"model.layers.55.input_layernorm",
|
||||
"model.layers.55.mlp.gate",
|
||||
"model.layers.55.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.55.post_attention_layernorm",
|
||||
"model.layers.55.self_attn.indexer.k_norm",
|
||||
"model.layers.55.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.55.self_attn.indexers_proj",
|
||||
"model.layers.55.self_attn.kv_a_layernorm",
|
||||
"model.layers.55.self_attn.q_a_layernorm",
|
||||
"model.layers.56.input_layernorm",
|
||||
"model.layers.56.mlp.gate",
|
||||
"model.layers.56.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.56.post_attention_layernorm",
|
||||
"model.layers.56.self_attn.indexer.k_norm",
|
||||
"model.layers.56.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.56.self_attn.indexers_proj",
|
||||
"model.layers.56.self_attn.kv_a_layernorm",
|
||||
"model.layers.56.self_attn.q_a_layernorm",
|
||||
"model.layers.57.input_layernorm",
|
||||
"model.layers.57.mlp.gate",
|
||||
"model.layers.57.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.57.post_attention_layernorm",
|
||||
"model.layers.57.self_attn.indexer.k_norm",
|
||||
"model.layers.57.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.57.self_attn.indexers_proj",
|
||||
"model.layers.57.self_attn.kv_a_layernorm",
|
||||
"model.layers.57.self_attn.q_a_layernorm",
|
||||
"model.layers.58.input_layernorm",
|
||||
"model.layers.58.mlp.gate",
|
||||
"model.layers.58.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.58.post_attention_layernorm",
|
||||
"model.layers.58.self_attn.indexer.k_norm",
|
||||
"model.layers.58.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.58.self_attn.indexers_proj",
|
||||
"model.layers.58.self_attn.kv_a_layernorm",
|
||||
"model.layers.58.self_attn.q_a_layernorm",
|
||||
"model.layers.59.input_layernorm",
|
||||
"model.layers.59.mlp.gate",
|
||||
"model.layers.59.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.59.post_attention_layernorm",
|
||||
"model.layers.59.self_attn.indexer.k_norm",
|
||||
"model.layers.59.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.59.self_attn.indexers_proj",
|
||||
"model.layers.59.self_attn.kv_a_layernorm",
|
||||
"model.layers.59.self_attn.q_a_layernorm",
|
||||
"model.layers.60.input_layernorm",
|
||||
"model.layers.60.mlp.gate",
|
||||
"model.layers.60.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.60.post_attention_layernorm",
|
||||
"model.layers.60.self_attn.indexer.k_norm",
|
||||
"model.layers.60.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.60.self_attn.indexers_proj",
|
||||
"model.layers.60.self_attn.kv_a_layernorm",
|
||||
"model.layers.60.self_attn.q_a_layernorm",
|
||||
"model.layers.61.input_layernorm",
|
||||
"model.layers.61.mlp.gate",
|
||||
"model.layers.61.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.61.post_attention_layernorm",
|
||||
"model.layers.61.self_attn.indexer.k_norm",
|
||||
"model.layers.61.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.61.self_attn.indexers_proj",
|
||||
"model.layers.61.self_attn.kv_a_layernorm",
|
||||
"model.layers.61.self_attn.q_a_layernorm",
|
||||
"model.layers.62.input_layernorm",
|
||||
"model.layers.62.mlp.gate",
|
||||
"model.layers.62.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.62.post_attention_layernorm",
|
||||
"model.layers.62.self_attn.indexer.k_norm",
|
||||
"model.layers.62.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.62.self_attn.indexers_proj",
|
||||
"model.layers.62.self_attn.kv_a_layernorm",
|
||||
"model.layers.62.self_attn.q_a_layernorm",
|
||||
"model.layers.63.input_layernorm",
|
||||
"model.layers.63.mlp.gate",
|
||||
"model.layers.63.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.63.post_attention_layernorm",
|
||||
"model.layers.63.self_attn.indexer.k_norm",
|
||||
"model.layers.63.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.63.self_attn.indexers_proj",
|
||||
"model.layers.63.self_attn.kv_a_layernorm",
|
||||
"model.layers.63.self_attn.q_a_layernorm",
|
||||
"model.layers.64.input_layernorm",
|
||||
"model.layers.64.mlp.gate",
|
||||
"model.layers.64.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.64.post_attention_layernorm",
|
||||
"model.layers.64.self_attn.indexer.k_norm",
|
||||
"model.layers.64.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.64.self_attn.indexers_proj",
|
||||
"model.layers.64.self_attn.kv_a_layernorm",
|
||||
"model.layers.64.self_attn.q_a_layernorm",
|
||||
"model.layers.65.input_layernorm",
|
||||
"model.layers.65.mlp.gate",
|
||||
"model.layers.65.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.65.post_attention_layernorm",
|
||||
"model.layers.65.self_attn.indexer.k_norm",
|
||||
"model.layers.65.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.65.self_attn.indexers_proj",
|
||||
"model.layers.65.self_attn.kv_a_layernorm",
|
||||
"model.layers.65.self_attn.q_a_layernorm",
|
||||
"model.layers.66.input_layernorm",
|
||||
"model.layers.66.mlp.gate",
|
||||
"model.layers.66.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.66.post_attention_layernorm",
|
||||
"model.layers.66.self_attn.indexer.k_norm",
|
||||
"model.layers.66.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.66.self_attn.indexers_proj",
|
||||
"model.layers.66.self_attn.kv_a_layernorm",
|
||||
"model.layers.66.self_attn.q_a_layernorm",
|
||||
"model.layers.67.input_layernorm",
|
||||
"model.layers.67.mlp.gate",
|
||||
"model.layers.67.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.67.post_attention_layernorm",
|
||||
"model.layers.67.self_attn.indexer.k_norm",
|
||||
"model.layers.67.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.67.self_attn.indexers_proj",
|
||||
"model.layers.67.self_attn.kv_a_layernorm",
|
||||
"model.layers.67.self_attn.q_a_layernorm",
|
||||
"model.layers.68.input_layernorm",
|
||||
"model.layers.68.mlp.gate",
|
||||
"model.layers.68.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.68.post_attention_layernorm",
|
||||
"model.layers.68.self_attn.indexer.k_norm",
|
||||
"model.layers.68.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.68.self_attn.indexers_proj",
|
||||
"model.layers.68.self_attn.kv_a_layernorm",
|
||||
"model.layers.68.self_attn.q_a_layernorm",
|
||||
"model.layers.69.input_layernorm",
|
||||
"model.layers.69.mlp.gate",
|
||||
"model.layers.69.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.69.post_attention_layernorm",
|
||||
"model.layers.69.self_attn.indexer.k_norm",
|
||||
"model.layers.69.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.69.self_attn.indexers_proj",
|
||||
"model.layers.69.self_attn.kv_a_layernorm",
|
||||
"model.layers.69.self_attn.q_a_layernorm",
|
||||
"model.layers.70.input_layernorm",
|
||||
"model.layers.70.mlp.gate",
|
||||
"model.layers.70.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.70.post_attention_layernorm",
|
||||
"model.layers.70.self_attn.indexer.k_norm",
|
||||
"model.layers.70.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.70.self_attn.indexers_proj",
|
||||
"model.layers.70.self_attn.kv_a_layernorm",
|
||||
"model.layers.70.self_attn.q_a_layernorm",
|
||||
"model.layers.71.input_layernorm",
|
||||
"model.layers.71.mlp.gate",
|
||||
"model.layers.71.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.71.post_attention_layernorm",
|
||||
"model.layers.71.self_attn.indexer.k_norm",
|
||||
"model.layers.71.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.71.self_attn.indexers_proj",
|
||||
"model.layers.71.self_attn.kv_a_layernorm",
|
||||
"model.layers.71.self_attn.q_a_layernorm",
|
||||
"model.layers.72.input_layernorm",
|
||||
"model.layers.72.mlp.gate",
|
||||
"model.layers.72.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.72.post_attention_layernorm",
|
||||
"model.layers.72.self_attn.indexer.k_norm",
|
||||
"model.layers.72.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.72.self_attn.indexers_proj",
|
||||
"model.layers.72.self_attn.kv_a_layernorm",
|
||||
"model.layers.72.self_attn.q_a_layernorm",
|
||||
"model.layers.73.input_layernorm",
|
||||
"model.layers.73.mlp.gate",
|
||||
"model.layers.73.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.73.post_attention_layernorm",
|
||||
"model.layers.73.self_attn.indexer.k_norm",
|
||||
"model.layers.73.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.73.self_attn.indexers_proj",
|
||||
"model.layers.73.self_attn.kv_a_layernorm",
|
||||
"model.layers.73.self_attn.q_a_layernorm",
|
||||
"model.layers.74.input_layernorm",
|
||||
"model.layers.74.mlp.gate",
|
||||
"model.layers.74.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.74.post_attention_layernorm",
|
||||
"model.layers.74.self_attn.indexer.k_norm",
|
||||
"model.layers.74.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.74.self_attn.indexers_proj",
|
||||
"model.layers.74.self_attn.kv_a_layernorm",
|
||||
"model.layers.74.self_attn.q_a_layernorm",
|
||||
"model.layers.75.input_layernorm",
|
||||
"model.layers.75.mlp.gate",
|
||||
"model.layers.75.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.75.post_attention_layernorm",
|
||||
"model.layers.75.self_attn.indexer.k_norm",
|
||||
"model.layers.75.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.75.self_attn.indexers_proj",
|
||||
"model.layers.75.self_attn.kv_a_layernorm",
|
||||
"model.layers.75.self_attn.q_a_layernorm",
|
||||
"model.layers.76.input_layernorm",
|
||||
"model.layers.76.mlp.gate",
|
||||
"model.layers.76.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.76.post_attention_layernorm",
|
||||
"model.layers.76.self_attn.indexer.k_norm",
|
||||
"model.layers.76.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.76.self_attn.indexers_proj",
|
||||
"model.layers.76.self_attn.kv_a_layernorm",
|
||||
"model.layers.76.self_attn.q_a_layernorm",
|
||||
"model.layers.77.input_layernorm",
|
||||
"model.layers.77.mlp.gate",
|
||||
"model.layers.77.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.77.post_attention_layernorm",
|
||||
"model.layers.77.self_attn.indexer.k_norm",
|
||||
"model.layers.77.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.77.self_attn.indexers_proj",
|
||||
"model.layers.77.self_attn.kv_a_layernorm",
|
||||
"model.layers.77.self_attn.q_a_layernorm",
|
||||
"model.layers.78.eh_proj",
|
||||
"model.layers.78.enorm",
|
||||
"model.layers.78.hnorm",
|
||||
"model.layers.78.input_layernorm",
|
||||
"model.layers.78.mlp.gate",
|
||||
"model.layers.78.mlp.gate.e_score_correction_bias",
|
||||
"model.layers.78.post_attention_layernorm",
|
||||
"model.layers.78.self_attn.indexer.k_norm",
|
||||
"model.layers.78.self_attn.indexer.k_norm.bias",
|
||||
"model.layers.78.self_attn.indexers_proj",
|
||||
"model.layers.78.self_attn.kv_a_layernorm",
|
||||
"model.layers.78.self_attn.q_a_layernorm",
|
||||
"model.layers.78.shared_head.norm",
|
||||
"model.norm"
|
||||
]
|
||||
}
|
||||
}
|
||||
1441793
trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer.json
Normal file
1441793
trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
33
trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer_config.json
Normal file
33
trace_model_meta/ZhipuAI/GLM-5-FP8/tokenizer_config.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"backend": "tokenizers",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"do_lower_case": false,
|
||||
"eos_token": "<|endoftext|>",
|
||||
"extra_special_tokens": [
|
||||
"<|endoftext|>",
|
||||
"[MASK]",
|
||||
"[gMASK]",
|
||||
"[sMASK]",
|
||||
"<sop>",
|
||||
"<eop>",
|
||||
"<|system|>",
|
||||
"<|user|>",
|
||||
"<|assistant|>",
|
||||
"<|observation|>",
|
||||
"<|begin_of_image|>",
|
||||
"<|end_of_image|>",
|
||||
"<|begin_of_video|>",
|
||||
"<|end_of_video|>",
|
||||
"<|begin_of_audio|>",
|
||||
"<|end_of_audio|>",
|
||||
"<|begin_of_transcription|>",
|
||||
"<|end_of_transcription|>"
|
||||
],
|
||||
"is_local": true,
|
||||
"model_max_length": 202752,
|
||||
"model_specific_special_tokens": {},
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "left",
|
||||
"remove_space": false,
|
||||
"tokenizer_class": "TokenizersBackend"
|
||||
}
|
||||
23
trace_model_meta/__init__.py
Normal file
23
trace_model_meta/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from .registry import (
|
||||
ModelMeta,
|
||||
detect_model_family_from_features,
|
||||
detect_model_family_from_records,
|
||||
detect_model_family_from_trace_file,
|
||||
get_model_meta,
|
||||
infer_model_family_from_request_model,
|
||||
resolve_chat_template_path,
|
||||
resolve_model_family,
|
||||
resolve_tokenizer_path,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ModelMeta",
|
||||
"detect_model_family_from_features",
|
||||
"detect_model_family_from_records",
|
||||
"detect_model_family_from_trace_file",
|
||||
"get_model_meta",
|
||||
"infer_model_family_from_request_model",
|
||||
"resolve_chat_template_path",
|
||||
"resolve_model_family",
|
||||
"resolve_tokenizer_path",
|
||||
]
|
||||
201
trace_model_meta/registry.py
Normal file
201
trace_model_meta/registry.py
Normal file
@@ -0,0 +1,201 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
MODEL_META_ROOT = Path(__file__).resolve().parent
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelMeta:
|
||||
family: str
|
||||
provider: str
|
||||
model_name: str
|
||||
request_model_hints: tuple[str, ...]
|
||||
|
||||
@property
|
||||
def model_dir(self) -> Path:
|
||||
return MODEL_META_ROOT / self.provider / self.model_name
|
||||
|
||||
@property
|
||||
def tokenizer_path(self) -> Path:
|
||||
return self.model_dir / "tokenizer.json"
|
||||
|
||||
@property
|
||||
def chat_template_path(self) -> Path:
|
||||
return self.model_dir / "chat_template.jinja"
|
||||
|
||||
|
||||
MODEL_REGISTRY = {
|
||||
"glm5": ModelMeta(
|
||||
family="glm5",
|
||||
provider="ZhipuAI",
|
||||
model_name="GLM-5-FP8",
|
||||
request_model_hints=("glm", "zhipu"),
|
||||
),
|
||||
"qwen3-coder": ModelMeta(
|
||||
family="qwen3-coder",
|
||||
provider="Qwen",
|
||||
model_name="Qwen3-Coder-480B-A35B-Instruct",
|
||||
request_model_hints=("qwen3-coder", "qwen3 coder", "qwen3_coder"),
|
||||
),
|
||||
}
|
||||
|
||||
MODEL_ALIASES = {
|
||||
"glm": "glm5",
|
||||
"glm5": "glm5",
|
||||
"zhipu-glm5": "glm5",
|
||||
"zhipuai-glm5": "glm5",
|
||||
"qwen": "qwen3-coder",
|
||||
"qwen3": "qwen3-coder",
|
||||
"qwen3-coder": "qwen3-coder",
|
||||
"qwen3_coder": "qwen3-coder",
|
||||
"coder": "qwen3-coder",
|
||||
}
|
||||
|
||||
|
||||
def infer_model_family_from_request_model(request_model: str | None) -> str | None:
|
||||
text = str(request_model or "").strip().lower()
|
||||
if not text:
|
||||
return None
|
||||
for family, meta in MODEL_REGISTRY.items():
|
||||
if any(hint in text for hint in meta.request_model_hints):
|
||||
return family
|
||||
return None
|
||||
|
||||
|
||||
def _infer_model_family_from_path(input_path: str | Path | None) -> str | None:
|
||||
text = str(input_path or "").strip().lower()
|
||||
if not text:
|
||||
return None
|
||||
if "qwen3-coder" in text or "qwen3_coder" in text:
|
||||
return "qwen3-coder"
|
||||
if "glm5" in text or "trace-glm" in text:
|
||||
return "glm5"
|
||||
return None
|
||||
|
||||
|
||||
def detect_model_family_from_trace_file(path: str | Path) -> str | None:
|
||||
resolved = Path(path)
|
||||
with resolved.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
raw = json.loads(stripped)
|
||||
if isinstance(raw.get("meta"), dict):
|
||||
meta = raw["meta"]
|
||||
family = str(meta.get("model_family", "")).strip()
|
||||
if family:
|
||||
return resolve_model_family(family)
|
||||
inferred = infer_model_family_from_request_model(meta.get("request_model"))
|
||||
if inferred:
|
||||
return inferred
|
||||
inferred = infer_model_family_from_request_model(raw.get("request_model"))
|
||||
if inferred:
|
||||
return inferred
|
||||
break
|
||||
return _infer_model_family_from_path(path)
|
||||
|
||||
|
||||
def detect_model_family_from_features(path: str | Path) -> str | None:
|
||||
resolved = Path(path)
|
||||
with resolved.open("r", encoding="utf-8") as handle:
|
||||
reader = csv.DictReader(handle)
|
||||
for row in reader:
|
||||
inferred = infer_model_family_from_request_model(row.get("model"))
|
||||
if inferred:
|
||||
return inferred
|
||||
break
|
||||
return _infer_model_family_from_path(path)
|
||||
|
||||
|
||||
def detect_model_family_from_records(records) -> str | None:
|
||||
for record in records:
|
||||
inferred = infer_model_family_from_request_model(record.meta.request_model)
|
||||
if inferred:
|
||||
return inferred
|
||||
break
|
||||
return None
|
||||
|
||||
|
||||
def resolve_model_family(
|
||||
model_family: str | None = None,
|
||||
*,
|
||||
request_model: str | None = None,
|
||||
input_path: str | Path | None = None,
|
||||
features_path: str | Path | None = None,
|
||||
records=None,
|
||||
) -> str:
|
||||
candidate = str(model_family or "auto").strip().lower()
|
||||
if candidate and candidate != "auto":
|
||||
if candidate in MODEL_ALIASES:
|
||||
return MODEL_ALIASES[candidate]
|
||||
raise ValueError(f"Unsupported model family: {model_family}")
|
||||
|
||||
inferred = infer_model_family_from_request_model(request_model)
|
||||
if inferred:
|
||||
return inferred
|
||||
if records is not None:
|
||||
inferred = detect_model_family_from_records(records)
|
||||
if inferred:
|
||||
return inferred
|
||||
if features_path is not None:
|
||||
inferred = detect_model_family_from_features(features_path)
|
||||
if inferred:
|
||||
return inferred
|
||||
if input_path is not None:
|
||||
inferred = detect_model_family_from_trace_file(input_path)
|
||||
if inferred:
|
||||
return inferred
|
||||
return "glm5"
|
||||
|
||||
|
||||
def get_model_meta(model_family: str | None = None, *, model_meta_dir: str | Path | None = None, **kwargs) -> ModelMeta:
|
||||
family = resolve_model_family(model_family, **kwargs)
|
||||
base_meta = MODEL_REGISTRY[family]
|
||||
if model_meta_dir is None:
|
||||
return base_meta
|
||||
|
||||
custom_root = Path(model_meta_dir)
|
||||
custom_model_dir = custom_root / base_meta.provider / base_meta.model_name
|
||||
if not custom_model_dir.exists():
|
||||
raise FileNotFoundError(f"Model meta directory not found for {family}: {custom_model_dir}")
|
||||
return ModelMeta(
|
||||
family=base_meta.family,
|
||||
provider=base_meta.provider,
|
||||
model_name=base_meta.model_name,
|
||||
request_model_hints=base_meta.request_model_hints,
|
||||
)
|
||||
|
||||
|
||||
def resolve_chat_template_path(
|
||||
model_family: str | None = None,
|
||||
*,
|
||||
model_meta_dir: str | Path | None = None,
|
||||
**kwargs,
|
||||
) -> Path:
|
||||
family = resolve_model_family(model_family, **kwargs)
|
||||
meta = MODEL_REGISTRY[family]
|
||||
model_dir = Path(model_meta_dir) / meta.provider / meta.model_name if model_meta_dir else meta.model_dir
|
||||
return model_dir / "chat_template.jinja"
|
||||
|
||||
|
||||
def resolve_tokenizer_path(
|
||||
tokenizer_path: str | Path | None = None,
|
||||
*,
|
||||
model_family: str | None = None,
|
||||
model_meta_dir: str | Path | None = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
if tokenizer_path:
|
||||
path = Path(tokenizer_path)
|
||||
return str(path.parent if path.is_file() else path)
|
||||
|
||||
family = resolve_model_family(model_family, **kwargs)
|
||||
meta = MODEL_REGISTRY[family]
|
||||
model_dir = Path(model_meta_dir) / meta.provider / meta.model_name if model_meta_dir else meta.model_dir
|
||||
return str(model_dir)
|
||||
Reference in New Issue
Block a user