Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ff58003

Browse filesBrowse files
committed
Update llama.cpp
1 parent d989ac8 commit ff58003
Copy full SHA for ff58003

File tree

Expand file treeCollapse file tree

3 files changed

+59
-13
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+59
-13
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+3-1Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def eval_logits(self) -> Deque[List[float]]:
444444
maxlen=self._n_ctx if self.context_params.logits_all else 1,
445445
)
446446

447-
def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
447+
def tokenize(self, text: bytes, add_bos: bool = True, special: bool = False) -> List[int]:
448448
"""Tokenize a string.
449449
450450
Args:
@@ -466,6 +466,7 @@ def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
466466
tokens,
467467
n_ctx,
468468
add_bos,
469+
special
469470
)
470471
if n_tokens < 0:
471472
n_tokens = abs(n_tokens)
@@ -477,6 +478,7 @@ def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
477478
tokens,
478479
n_tokens,
479480
add_bos,
481+
special
480482
)
481483
if n_tokens < 0:
482484
raise RuntimeError(

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+55-11Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,11 @@ class llama_token_data_array(Structure):
240240
# typedef struct llama_batch {
241241
# int32_t n_tokens;
242242

243-
# llama_token * token;
244-
# float * embd;
245-
# llama_pos * pos;
246-
# llama_seq_id * seq_id;
247-
# int8_t * logits;
243+
# llama_token * token;
244+
# float * embd;
245+
# llama_pos * pos;
246+
# llama_seq_id ** seq_id;
247+
# int8_t * logits;
248248

249249

250250
# // NOTE: helpers for smooth API transition - can be deprecated in the future
@@ -262,7 +262,7 @@ class llama_batch(Structure):
262262
("token", POINTER(llama_token)),
263263
("embd", c_float_p),
264264
("pos", POINTER(llama_pos)),
265-
("seq_id", POINTER(llama_seq_id)),
265+
("seq_id", POINTER(POINTER(llama_seq_id))),
266266
("logits", POINTER(c_int8)),
267267
("all_pos_0", llama_pos),
268268
("all_pos_1", llama_pos),
@@ -1069,22 +1069,26 @@ def llama_batch_get_one(
10691069
_lib.llama_batch_get_one.restype = llama_batch
10701070

10711071

1072-
# // Allocates a batch of tokens on the heap
1072+
# // Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
1073+
# // Each token can be assigned up to n_seq_max sequence ids
10731074
# // The batch has to be freed with llama_batch_free()
10741075
# // If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
10751076
# // Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
10761077
# // The rest of the llama_batch members are allocated with size n_tokens
10771078
# // All members are left uninitialized
10781079
# LLAMA_API struct llama_batch llama_batch_init(
10791080
# int32_t n_tokens,
1080-
# int32_t embd);
1081+
# int32_t embd,
1082+
# int32_t n_seq_max);
10811083
def llama_batch_init(
1082-
n_tokens: Union[c_int, int], embd: Union[c_int, int]
1084+
n_tokens: Union[c_int32, int],
1085+
embd: Union[c_int32, int],
1086+
n_seq_max: Union[c_int32, int],
10831087
) -> llama_batch:
1084-
return _lib.llama_batch_init(n_tokens, embd)
1088+
return _lib.llama_batch_init(n_tokens, embd, n_seq_max)
10851089

10861090

1087-
_lib.llama_batch_init.argtypes = [c_int, c_int]
1091+
_lib.llama_batch_init.argtypes = [c_int32, c_int32, c_int32]
10881092
_lib.llama_batch_init.restype = llama_batch
10891093

10901094

@@ -1308,6 +1312,46 @@ def llama_tokenize(
13081312
_lib.llama_tokenize.restype = c_int
13091313

13101314

1315+
# /// @details Convert the provided text into tokens.
1316+
# /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
1317+
# /// @return Returns the number of tokens on success, no more than n_max_tokens
1318+
# /// @return Returns a negative number on failure - the number of tokens that would have been returned
1319+
# /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
1320+
# /// Does not insert a leading space.
1321+
# LLAMA_API int llama_tokenize(
1322+
# const struct llama_model * model,
1323+
# const char * text,
1324+
# int text_len,
1325+
# llama_token * tokens,
1326+
# int n_max_tokens,
1327+
# bool add_bos,
1328+
# bool special);
1329+
def llama_tokenize(
1330+
model: llama_model_p,
1331+
text: bytes,
1332+
text_len: Union[c_int, int],
1333+
tokens, # type: Array[llama_token]
1334+
n_max_tokens: Union[c_int, int],
1335+
add_bos: Union[c_bool, bool],
1336+
special: Union[c_bool, bool],
1337+
) -> int:
1338+
return _lib.llama_tokenize(
1339+
model, text, text_len, tokens, n_max_tokens, add_bos, special
1340+
)
1341+
1342+
1343+
_lib.llama_tokenize.argtypes = [
1344+
llama_model_p,
1345+
c_char_p,
1346+
c_int,
1347+
llama_token_p,
1348+
c_int,
1349+
c_bool,
1350+
c_bool,
1351+
]
1352+
_lib.llama_tokenize.restype = c_int
1353+
1354+
13111355
# // Token Id -> Piece.
13121356
# // Uses the vocabulary in the provided context.
13131357
# // Does not write null terminator to the buffer.

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.