Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 2c2afa3

Browse filesBrowse files
committed
Update llama.cpp
1 parent 6dde6bd commit 2c2afa3
Copy full SHA for 2c2afa3

File tree

Expand file treeCollapse file tree

3 files changed

+86
-5
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+86
-5
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+80-1Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ def _load_shared_library(lib_base_name: str):
205205
LLAMA_ROPE_SCALING_YARN = 2
206206
LLAMA_ROPE_SCALING_MAX_VALUE = LLAMA_ROPE_SCALING_YARN
207207

208+
208209
# typedef struct llama_token_data {
209210
# llama_token id; // token id
210211
# float logit; // log-odds of the token
@@ -661,6 +662,62 @@ def llama_rope_freq_scale_train(model: llama_model_p) -> float:
661662
_lib.llama_rope_freq_scale_train.argtypes = [llama_model_p]
662663
_lib.llama_rope_freq_scale_train.restype = c_float
663664

665+
# // Functions to access the model's GGUF metadata scalar values
666+
# // - The functions return the length of the string on success, or -1 on failure
667+
# // - The output string is always null-terminated and cleared on failure
668+
# // - GGUF array values are not supported by these functions
669+
670+
671+
# // Get metadata value as a string by key name
672+
# LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
673+
def llama_model_meta_val_str(
674+
model: llama_model_p, key: Union[c_char_p, bytes], buf: bytes, buf_size: int
675+
) -> int:
676+
return _lib.llama_model_meta_val_str(model, key, buf, buf_size)
677+
678+
679+
_lib.llama_model_meta_val_str.argtypes = [llama_model_p, c_char_p, c_char_p, c_size_t]
680+
_lib.llama_model_meta_val_str.restype = c_int
681+
682+
683+
# // Get the number of metadata key/value pairs
684+
# LLAMA_API int llama_model_meta_count(const struct llama_model * model);
685+
def llama_model_meta_count(model: llama_model_p) -> int:
686+
return _lib.llama_model_meta_count(model)
687+
688+
689+
_lib.llama_model_meta_count.argtypes = [llama_model_p]
690+
_lib.llama_model_meta_count.restype = c_int
691+
692+
693+
# // Get metadata key name by index
694+
# LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
695+
def llama_model_meta_key_by_index(
696+
model: llama_model_p, i: Union[c_int, int], buf: bytes, buf_size: int
697+
) -> int:
698+
return _lib.llama_model_meta_key_by_index(model, i, buf, buf_size)
699+
700+
701+
_lib.llama_model_meta_key_by_index.argtypes = [llama_model_p, c_int, c_char_p, c_size_t]
702+
_lib.llama_model_meta_key_by_index.restype = c_int
703+
704+
705+
# // Get metadata value as a string by index
706+
# LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
707+
def llama_model_meta_val_str_by_index(
708+
model: llama_model_p, i: Union[c_int, int], buf: bytes, buf_size: int
709+
) -> int:
710+
return _lib.llama_model_meta_val_str_by_index(model, i, buf, buf_size)
711+
712+
713+
_lib.llama_model_meta_val_str_by_index.argtypes = [
714+
llama_model_p,
715+
c_int,
716+
c_char_p,
717+
c_size_t,
718+
]
719+
_lib.llama_model_meta_val_str_by_index.restype = c_int
720+
664721

665722
# // Get a string describing the model type
666723
# LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
@@ -1213,7 +1270,9 @@ def llama_token_get_text(model: llama_model_p, token: Union[llama_token, int]) -
12131270

12141271

12151272
# LLAMA_API float llama_token_get_score(const struct llama_model * model, llama_token token);
1216-
def llama_token_get_score(model: llama_model_p, token: Union[llama_token, int]) -> float:
1273+
def llama_token_get_score(
1274+
model: llama_model_p, token: Union[llama_token, int]
1275+
) -> float:
12171276
return _lib.llama_token_get_score(model, token)
12181277

12191278

@@ -1260,6 +1319,26 @@ def llama_token_nl(model: llama_model_p) -> int:
12601319
_lib.llama_token_nl.restype = llama_token
12611320

12621321

1322+
# // Returns -1 if unknown, 1 for true or 0 for false.
1323+
# LLAMA_API int llama_add_bos_token(const struct llama_model * model);
1324+
def llama_add_bos_token(model: llama_model_p) -> int:
1325+
return _lib.llama_add_bos_token(model)
1326+
1327+
1328+
_lib.llama_add_bos_token.argtypes = [llama_model_p]
1329+
_lib.llama_add_bos_token.restype = c_int
1330+
1331+
1332+
# // Returns -1 if unknown, 1 for true or 0 for false.
1333+
# LLAMA_API int llama_add_eos_token(const struct llama_model * model);
1334+
def llama_add_eos_token(model: llama_model_p) -> int:
1335+
return _lib.llama_add_eos_token(model)
1336+
1337+
1338+
_lib.llama_add_eos_token.argtypes = [llama_model_p]
1339+
_lib.llama_add_eos_token.restype = c_int
1340+
1341+
12631342
# // codellama infill tokens
12641343
# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
12651344
def llama_token_prefix(model: llama_model_p) -> int:

‎tests/test_llama.py

Copy file name to clipboardExpand all lines: tests/test_llama.py
+5-3Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_llama_cpp_tokenization():
3535

3636
tokens = llama.tokenize(text, special=True)
3737
assert tokens[-1] == llama.token_eos()
38-
assert tokens == [1, 10994, 2787, 2]
38+
assert tokens == [1, 15043, 2787, 2]
3939

4040

4141
def test_llama_patch(monkeypatch):
@@ -55,8 +55,11 @@ def mock_get_logits(*args, **kwargs):
5555
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode)
5656
monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits)
5757

58+
text = "The quick brown fox"
59+
text_tokens = llama.tokenize(text.encode("utf-8"), add_bos=True, special=True)
5860
output_text = " jumps over the lazy dog."
59-
output_tokens = llama.tokenize(output_text.encode("utf-8"), add_bos=False, special=True)
61+
all_text_tokens = llama.tokenize((text + output_text).encode("utf-8"), add_bos=True, special=True)
62+
output_tokens = all_text_tokens[len(text_tokens):]
6063
token_eos = llama.token_eos()
6164
n = 0
6265

@@ -70,7 +73,6 @@ def mock_sample(*args, **kwargs):
7073

7174
monkeypatch.setattr("llama_cpp.llama_cpp.llama_sample_token", mock_sample)
7275

73-
text = "The quick brown fox"
7476

7577
## Test basic completion until eos
7678
n = 0 # reset

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.