diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 11fe169cf5..210fb040b8 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -107,6 +107,7 @@ def __init__(
         # Chat Format Params
         chat_format: Optional[str] = None,
         chat_handler: Optional[llama_chat_format.LlamaChatCompletionHandler] = None,
+        chat_template_kwargs: Optional[Dict[str, Any]] = None,
         # Speculative Decoding
         draft_model: Optional[LlamaDraftModel] = None,
         # Tokenizer Override
@@ -185,6 +186,7 @@ def __init__(
             numa: numa policy
             chat_format: String specifying the chat format to use when calling create_chat_completion.
             chat_handler: Optional chat handler to use when calling create_chat_completion.
+            chat_template_kwargs: Optional dict of keyword arguments forwarded to chat templates at model load time (e.g., enable_thinking=False for Qwen3.5 models).
             draft_model: Optional draft model to use for speculative decoding.
             tokenizer: Optional tokenizer to override the default tokenizer from llama.cpp.
             verbose: Print verbose output to stderr.
@@ -454,6 +456,11 @@ def free_lora_adapter():
             str, llama_chat_format.LlamaChatCompletionHandler
         ] = {}
 
+        # Wrap chat_handler with chat_template_kwargs if provided
+        # Store chat_template_kwargs on self so it can be resolved at call time
+        # (after _chat_handlers is populated from model metadata)
+        self._chat_template_kwargs = chat_template_kwargs or {}
+
         self.draft_model = draft_model
 
         self._n_vocab = self.n_vocab()
@@ -1973,6 +1980,7 @@ def create_chat_completion(
         logit_bias: Optional[Dict[int, float]] = None,
         logprobs: Optional[bool] = None,
         top_logprobs: Optional[int] = None,
+        **kwargs: Any,
     ) -> Union[
         CreateChatCompletionResponse, Iterator[CreateChatCompletionStreamResponse]
     ]:
@@ -2014,6 +2022,8 @@ def create_chat_completion(
             or self._chat_handlers.get(self.chat_format)
             or llama_chat_format.get_chat_completion_handler(self.chat_format)
         )
+        # chat_template_kwargs (from model load time) as defaults,
+        # but allow per-call kwargs to override them
         return handler(
             llama=self,
             messages=messages,
@@ -2044,6 +2054,7 @@ def create_chat_completion(
             logits_processor=logits_processor,
             grammar=grammar,
             logit_bias=logit_bias,
+            **{**self._chat_template_kwargs, **kwargs},
         )
 
     def create_chat_completion_openai_v1(
@@ -2132,6 +2143,7 @@ def __getstate__(self):
             # Chat Format Params
             chat_format=self.chat_format,
             chat_handler=self.chat_handler,
+            chat_template_kwargs=self._chat_template_kwargs,
             # Speculative Decidng
             draft_model=self.draft_model,
             # KV cache quantization