Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0d37ce5

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent ffcd4b2 commit 0d37ce5
Copy full SHA for 0d37ce5

File tree

Expand file treeCollapse file tree

3 files changed

+5
-145
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+5
-145
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+2-142Lines changed: 2 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class CtypesRef(Generic[CtypesCData]):
111111

112112
F = TypeVar("F", bound=Callable[..., Any])
113113

114+
114115
def ctypes_function_for_shared_library(lib: ctypes.CDLL):
115116
def ctypes_function(
116117
name: str, argtypes: List[Any], restype: Any, enabled: bool = True
@@ -938,18 +939,6 @@ def llama_supports_gpu_offload() -> bool:
938939
...
939940

940941

941-
# LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
942-
@ctypes_function("llama_mmap_supported", [], ctypes.c_bool)
943-
def llama_mmap_supported() -> bool:
944-
...
945-
946-
947-
# LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
948-
@ctypes_function("llama_mlock_supported", [], ctypes.c_bool)
949-
def llama_mlock_supported() -> bool:
950-
...
951-
952-
953942
# LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
954943
@ctypes_function("llama_get_model", [llama_context_p_ctypes], llama_model_p_ctypes)
955944
def llama_get_model(ctx: llama_context_p, /) -> Optional[llama_model_p]:
@@ -1158,47 +1147,6 @@ def llama_model_quantize(
11581147
...
11591148

11601149

1161-
# // Apply a LoRA adapter to a loaded model
1162-
# // path_base_model is the path to a higher quality model to use as a base for
1163-
# // the layers modified by the adapter. Can be NULL to use the current loaded model.
1164-
# // The model needs to be reloaded before applying a new adapter, otherwise the adapter
1165-
# // will be applied on top of the previous one
1166-
# // Returns 0 on success
1167-
# LLAMA_API DEPRECATED(int32_t llama_apply_lora_from_file(
1168-
# struct llama_context * ctx,
1169-
# const char * path_lora,
1170-
# float scale,
1171-
# const char * path_base_model,
1172-
# int32_t n_threads),
1173-
# "use llama_model_apply_lora_from_file instead");
1174-
@ctypes_function(
1175-
"llama_apply_lora_from_file",
1176-
[
1177-
llama_context_p_ctypes,
1178-
ctypes.c_char_p,
1179-
ctypes.c_float,
1180-
ctypes.c_char_p,
1181-
ctypes.c_int32,
1182-
],
1183-
ctypes.c_int32,
1184-
)
1185-
def llama_apply_lora_from_file(
1186-
ctx: llama_context_p,
1187-
path_lora: Union[ctypes.c_char_p, bytes],
1188-
scale: Union[ctypes.c_float, float],
1189-
path_base_model: Union[ctypes.c_char_p, bytes],
1190-
n_threads: Union[ctypes.c_int32, int],
1191-
/,
1192-
) -> int:
1193-
"""Apply a LoRA adapter to a loaded model
1194-
path_base_model is the path to a higher quality model to use as a base for
1195-
the layers modified by the adapter. Can be NULL to use the current loaded model.
1196-
The model needs to be reloaded before applying a new adapter, otherwise the adapter
1197-
will be applied on top of the previous one
1198-
Returns 0 on success"""
1199-
...
1200-
1201-
12021150
# LLAMA_API int32_t llama_model_apply_lora_from_file(
12031151
# const struct llama_model * model,
12041152
# const char * path_lora,
@@ -1220,7 +1168,7 @@ def llama_model_apply_lora_from_file(
12201168
model: llama_model_p,
12211169
path_lora: Union[ctypes.c_char_p, bytes],
12221170
scale: Union[ctypes.c_float, float],
1223-
path_base_model: Union[ctypes.c_char_p, bytes],
1171+
path_base_model: Union[ctypes.c_char_p, bytes, None],
12241172
n_threads: Union[ctypes.c_int32, int],
12251173
/,
12261174
) -> int:
@@ -1647,72 +1595,6 @@ def llama_save_session_file(
16471595
# //
16481596

16491597

1650-
# // Run the llama inference to obtain the logits and probabilities for the next token(s).
1651-
# // tokens + n_tokens is the provided batch of new tokens to process
1652-
# // n_past is the number of tokens to use from previous eval calls
1653-
# // Returns 0 on success
1654-
# // DEPRECATED: use llama_decode() instead
1655-
# LLAMA_API DEPRECATED(int llama_eval(
1656-
# struct llama_context * ctx,
1657-
# llama_token * tokens,
1658-
# int32_t n_tokens,
1659-
# int32_t n_past),
1660-
# "use llama_decode() instead");
1661-
@ctypes_function(
1662-
"llama_eval",
1663-
[
1664-
llama_context_p_ctypes,
1665-
llama_token_p,
1666-
ctypes.c_int32,
1667-
ctypes.c_int32,
1668-
],
1669-
ctypes.c_int,
1670-
)
1671-
def llama_eval(
1672-
ctx: llama_context_p,
1673-
tokens: CtypesArray[llama_token],
1674-
n_tokens: Union[ctypes.c_int, int],
1675-
n_past: Union[ctypes.c_int, int],
1676-
/,
1677-
) -> int:
1678-
"""Run the llama inference to obtain the logits and probabilities for the next token(s).
1679-
tokens + n_tokens is the provided batch of new tokens to process
1680-
n_past is the number of tokens to use from previous eval calls
1681-
Returns 0 on success
1682-
DEPRECATED: use llama_decode() instead"""
1683-
...
1684-
1685-
1686-
# // Same as llama_eval, but use float matrix input directly.
1687-
# // DEPRECATED: use llama_decode() instead
1688-
# LLAMA_API DEPRECATED(int llama_eval_embd(
1689-
# struct llama_context * ctx,
1690-
# float * embd,
1691-
# int32_t n_tokens,
1692-
# int32_t n_past),
1693-
# "use llama_decode() instead");
1694-
@ctypes_function(
1695-
"llama_eval_embd",
1696-
[
1697-
llama_context_p_ctypes,
1698-
ctypes.POINTER(ctypes.c_float),
1699-
ctypes.c_int32,
1700-
ctypes.c_int32,
1701-
],
1702-
ctypes.c_int,
1703-
)
1704-
def llama_eval_embd(
1705-
ctx: llama_context_p,
1706-
embd: CtypesArray[ctypes.c_float],
1707-
n_tokens: Union[ctypes.c_int, int],
1708-
n_past: Union[ctypes.c_int, int],
1709-
/,
1710-
) -> int:
1711-
"""Same as llama_eval, but use float matrix input directly.
1712-
DEPRECATED: use llama_decode() instead"""
1713-
...
1714-
1715-
17161598
# // Return batch for single sequence of tokens starting at pos_0
17171599
# //
17181600
# // NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
@@ -2474,28 +2356,6 @@ def llama_sample_temp(
24742356
...
24752357

24762358

2477-
# LLAMA_API DEPRECATED(void llama_sample_temperature(
2478-
# struct llama_context * ctx,
2479-
# llama_token_data_array * candidates,
2480-
# float temp),
2481-
# "use llama_sample_temp instead");
2482-
@ctypes_function(
2483-
"llama_sample_temperature",
2484-
[llama_context_p_ctypes, llama_token_data_array_p, ctypes.c_float],
2485-
None,
2486-
)
2487-
def llama_sample_temperature(
2488-
ctx: llama_context_p,
2489-
candidates: Union[
2490-
CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array]
2491-
],
2492-
temp: Union[ctypes.c_float, float],
2493-
/,
2494-
):
2495-
"""use llama_sample_temp instead"""
2496-
...
2497-
2498-
24992359
# /// @details Apply constraints from grammar
25002360
# LLAMA_API void llama_sample_grammar(
25012361
# struct llama_context * ctx,

‎llama_cpp/server/settings.py

Copy file name to clipboardExpand all lines: llama_cpp/server/settings.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ class ModelSettings(BaseSettings):
4545
default=False, description="Whether to only return the vocabulary."
4646
)
4747
use_mmap: bool = Field(
48-
default=llama_cpp.llama_mmap_supported(),
48+
default=llama_cpp.llama_supports_mmap(),
4949
description="Use mmap.",
5050
)
5151
use_mlock: bool = Field(
52-
default=llama_cpp.llama_mlock_supported(),
52+
default=llama_cpp.llama_supports_mlock(),
5353
description="Use mlock.",
5454
)
5555
kv_overrides: Optional[List[str]] = Field(

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.