mglambda
diff --git a/‎.editorconfig
Copy file name to clipboardExpand all lines: .editorconfig
+8 b/‎.editorconfig
Copy file name to clipboardExpand all lines: .editorconfig
+8
diff --git a/‎.github/workflows/server.yml
Copy file name to clipboardExpand all lines: .github/workflows/server.yml
+1-1 b/‎.github/workflows/server.yml
Copy file name to clipboardExpand all lines: .github/workflows/server.yml
+1-1
diff --git a/‎Makefile
Copy file name to clipboardExpand all lines: Makefile
+9 b/‎Makefile
Copy file name to clipboardExpand all lines: Makefile
+9
diff --git a/‎README.md
Copy file name to clipboardExpand all lines: README.md
+1 b/‎README.md
Copy file name to clipboardExpand all lines: README.md
+1
diff --git a/‎common/CMakeLists.txt
Copy file name to clipboardExpand all lines: common/CMakeLists.txt
+2 b/‎common/CMakeLists.txt
Copy file name to clipboardExpand all lines: common/CMakeLists.txt
+2
@@ -40,3 +40,11 @@ indent_style = tab
 [examples/cvector-generator/*.txt]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
+
+[models/templates/*.jinja]
+indent_style = unset
+indent_size = unset
+end_of_line = unset
+charset = unset
+trim_trailing_whitespace = unset
+insert_final_newline = unset
@@ -205,7 +205,7 @@ jobs:
         run: |
           cd examples/server/tests
           $env:PYTHONIOENCODING = ":replace"
-          pytest -v -x
+          pytest -v -x -m "not slow"
 
       - name: Slow tests
         id: server_integration_tests_slow
 
@@ -52,6 +52,7 @@ TEST_TARGETS = \
 	tests/test-arg-parser \
 	tests/test-autorelease \
 	tests/test-backend-ops \
+	tests/test-chat \
 	tests/test-chat-template \
 	tests/test-double-float \
 	tests/test-grammar-integration \
@@ -983,6 +984,7 @@ OBJ_COMMON = \
 	$(DIR_COMMON)/ngram-cache.o \
 	$(DIR_COMMON)/sampling.o \
 	$(DIR_COMMON)/speculative.o \
+	$(DIR_COMMON)/chat.o \
 	$(DIR_COMMON)/build-info.o \
 	$(DIR_COMMON)/json-schema-to-grammar.o
 
@@ -1361,6 +1363,8 @@ llama-server: \
 	examples/server/httplib.h \
 	examples/server/index.html.hpp \
 	examples/server/loading.html.hpp \
+	common/chat.cpp \
+	common/chat.hpp \
 	common/chat-template.hpp \
 	common/json.hpp \
 	common/minja.hpp \
@@ -1471,6 +1475,11 @@ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
 	$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
+tests/test-chat: tests/test-chat.cpp \
+	$(OBJ_ALL)
+	$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+
 tests/test-opt: tests/test-opt.cpp \
 	$(OBJ_GGML)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 
@@ -18,6 +18,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
 
 - **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427
 - **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode
+- Universal tool call support in `llama-server`: https://github.com/ggerganov/llama.cpp/pull/9639
 - Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
 - Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123
 - Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669
 
@@ -56,6 +56,8 @@ add_library(${TARGET} STATIC
     arg.cpp
     arg.h
     base64.hpp
+    chat.cpp
+    chat.hpp
     chat-template.hpp
     common.cpp
     common.h