def get_deepseek_v4_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
"""
Wraps a tokenizer to use the custom DeepSeek V4 chat template encoding.
"""
dsv4_tokenizer = copy.copy(tokenizer)
added_vocab = tokenizer.get_added_vocab()
added_vocab_size = len(added_vocab)
tokenizer_vocab_size = tokenizer.vocab_size
class _DeepseekV4Tokenizer(tokenizer.__class__): # type: ignore
def apply_chat_template(
self,
messages: list["ChatCompletionMessageParam"],
tools: list[dict[str, Any]] | None = None,
**kwargs,
) -> str | list[int]:
thinking = kwargs.get("thinking", False)
enable_thinking = kwargs.get("enable_thinking", False)
thinking = thinking or enable_thinking
thinking_mode = "thinking" if thinking else "chat"
conversation = kwargs.get("conversation", messages)
messages = conversation.copy()
if tools is not None and len(tools) > 0:
messages.insert(0, {"role": "system"})
messages[0]["tools"] = tools # type: ignore[typeddict-unknown-key]
# The V4 reference currently accepts only "max", "high", or None.
reasoning_effort = kwargs.get("reasoning_effort")
if reasoning_effort not in ("max", "high"):
reasoning_effort = None
encode_config = dict(
thinking_mode=thinking_mode,
drop_thinking=kwargs.get("drop_thinking", True),
reasoning_effort=reasoning_effort,
)
prompt_str = encode_messages(messages, **encode_config) # type: ignore
if kwargs.get("tokenize", True):
tokenizer_kwargs = {
k: kwargs[k] for k in ("truncation", "max_length") if k in kwargs
}
return self.encode(
prompt_str,
add_special_tokens=False,
**tokenizer_kwargs,
)
return prompt_str
def num_special_tokens_to_add(self) -> int:
return len(self.encode(""))
def __len__(self) -> int:
return tokenizer_vocab_size + added_vocab_size
def get_added_vocab(self) -> dict[str, int]:
return added_vocab.copy()
def __reduce__(self):
return get_deepseek_v4_tokenizer, (tokenizer,)
_DeepseekV4Tokenizer.__name__ = f"DSV4{tokenizer.__class__.__name__}"
dsv4_tokenizer.__class__ = _DeepseekV4Tokenizer
return dsv4_tokenizer