Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 255e1b4

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent d634efc commit 255e1b4
Copy full SHA for 255e1b4

File tree

Expand file treeCollapse file tree

3 files changed

+38
-103
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+38
-103
lines changed

‎llama_cpp/_internals.py

Copy file name to clipboardExpand all lines: llama_cpp/_internals.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,9 @@ def token_get_score(self, token: int) -> float:
128128
assert self.model is not None
129129
return llama_cpp.llama_token_get_score(self.model, token)
130130

131-
def token_get_type(self, token: int) -> int:
131+
def token_get_attr(self, token: int) -> int:
132132
assert self.model is not None
133-
return llama_cpp.llama_token_get_type(self.model, token)
133+
return llama_cpp.llama_token_get_attr(self.model, token)
134134

135135
# Special tokens
136136

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+35-100Lines changed: 35 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
333333
LLAMA_ROPE_TYPE_GLM = 4
334334

335335

336-
# enum llama_token_type {
336+
# enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
337337
# LLAMA_TOKEN_TYPE_UNDEFINED = 0,
338338
# LLAMA_TOKEN_TYPE_NORMAL = 1,
339339
# LLAMA_TOKEN_TYPE_UNKNOWN = 2,
@@ -351,6 +351,32 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
351351
LLAMA_TOKEN_TYPE_BYTE = 6
352352

353353

354+
# enum llama_token_attr {
355+
# LLAMA_TOKEN_ATTR_UNDEFINED = 0,
356+
# LLAMA_TOKEN_ATTR_UNKNOWN = 1 << 0,
357+
# LLAMA_TOKEN_ATTR_UNUSED = 1 << 1,
358+
# LLAMA_TOKEN_ATTR_NORMAL = 1 << 2,
359+
# LLAMA_TOKEN_ATTR_CONTROL = 1 << 3, // SPECIAL?
360+
# LLAMA_TOKEN_ATTR_USER_DEFINED = 1 << 4,
361+
# LLAMA_TOKEN_ATTR_BYTE = 1 << 5,
362+
# LLAMA_TOKEN_ATTR_NORMALIZED = 1 << 6,
363+
# LLAMA_TOKEN_ATTR_LSTRIP = 1 << 7,
364+
# LLAMA_TOKEN_ATTR_RSTRIP = 1 << 8,
365+
# LLAMA_TOKEN_ATTR_SINGLE_WORD = 1 << 9,
366+
# };
367+
LLAMA_TOKEN_ATTR_UNDEFINED = 0
368+
LLAMA_TOKEN_ATTR_UNKNOWN = 1 << 0
369+
LLAMA_TOKEN_ATTR_UNUSED = 1 << 1
370+
LLAMA_TOKEN_ATTR_NORMAL = 1 << 2
371+
LLAMA_TOKEN_ATTR_CONTROL = 1 << 3
372+
LLAMA_TOKEN_ATTR_USER_DEFINED = 1 << 4
373+
LLAMA_TOKEN_ATTR_BYTE = 1 << 5
374+
LLAMA_TOKEN_ATTR_NORMALIZED = 1 << 6
375+
LLAMA_TOKEN_ATTR_LSTRIP = 1 << 7
376+
LLAMA_TOKEN_ATTR_RSTRIP = 1 << 8
377+
LLAMA_TOKEN_ATTR_SINGLE_WORD = 1 << 9
378+
379+
354380
# // model file types
355381
# enum llama_ftype {
356382
# LLAMA_FTYPE_ALL_F32 = 0,
@@ -959,6 +985,9 @@ class llama_model_quantize_params(ctypes.Structure):
959985
# // modifies a preceding LLAMA_GRETYPE_CHAR or
960986
# // LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA])
961987
# LLAMA_GRETYPE_CHAR_ALT = 6,
988+
989+
# // any character (.)
990+
# LLAMA_GRETYPE_CHAR_ANY = 7,
962991
# };
963992
LLAMA_GRETYPE_END = 0
964993
LLAMA_GRETYPE_ALT = 1
@@ -967,6 +996,7 @@ class llama_model_quantize_params(ctypes.Structure):
967996
LLAMA_GRETYPE_CHAR_NOT = 4
968997
LLAMA_GRETYPE_CHAR_RNG_UPPER = 5
969998
LLAMA_GRETYPE_CHAR_ALT = 6
999+
LLAMA_GRETYPE_CHAR_ANY = 7
9701000

9711001

9721002
# typedef struct llama_grammar_element {
@@ -2438,11 +2468,11 @@ def llama_token_get_score(
24382468
) -> float: ...
24392469

24402470

2441-
# LLAMA_API enum llama_token_type llama_token_get_type(const struct llama_model * model, llama_token token);
2471+
# LLAMA_API enum llama_token_attr llama_token_get_attr(const struct llama_model * model, llama_token token);
24422472
@ctypes_function(
2443-
"llama_token_get_type", [llama_model_p_ctypes, llama_token], ctypes.c_int
2473+
"llama_token_get_attr", [llama_model_p_ctypes, llama_token], ctypes.c_int
24442474
)
2445-
def llama_token_get_type(
2475+
def llama_token_get_attr(
24462476
model: llama_model_p, token: Union[llama_token, int], /
24472477
) -> int: ...
24482478

@@ -3200,104 +3230,9 @@ def llama_grammar_accept_token(
32003230

32013231

32023232
# //
3203-
# // Beam search
3233+
# // Model split
32043234
# //
32053235

3206-
# struct llama_beam_view {
3207-
# const llama_token * tokens;
3208-
3209-
3210-
# size_t n_tokens;
3211-
# float p; // Cumulative beam probability (renormalized relative to all beams)
3212-
# bool eob; // Callback should set this to true when a beam is at end-of-beam.
3213-
# };
3214-
class llama_beam_view(ctypes.Structure):
3215-
if TYPE_CHECKING:
3216-
tokens: CtypesArray[llama_token]
3217-
n_tokens: int
3218-
p: float
3219-
eob: bool
3220-
3221-
_fields_ = [
3222-
("tokens", llama_token_p),
3223-
("n_tokens", ctypes.c_size_t),
3224-
("p", ctypes.c_float),
3225-
("eob", ctypes.c_bool),
3226-
]
3227-
3228-
3229-
# // Passed to beam_search_callback function.
3230-
# // Whenever 0 < common_prefix_length, this number of tokens should be copied from any of the beams
3231-
# // (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks.
3232-
# // These pointers are valid only during the synchronous callback, so should not be saved.
3233-
# struct llama_beams_state {
3234-
# struct llama_beam_view * beam_views;
3235-
# size_t n_beams; // Number of elements in beam_views[].
3236-
# size_t common_prefix_length; // Current max length of prefix tokens shared by all beams.
3237-
# bool last_call; // True iff this is the last callback invocation.
3238-
# };
3239-
class llama_beams_state(ctypes.Structure):
3240-
if TYPE_CHECKING:
3241-
beam_views: CtypesArray[llama_beam_view]
3242-
n_beams: int
3243-
common_prefix_length: int
3244-
last_call: bool
3245-
3246-
_fields_ = [
3247-
("beam_views", ctypes.POINTER(llama_beam_view)),
3248-
("n_beams", ctypes.c_size_t),
3249-
("common_prefix_length", ctypes.c_size_t),
3250-
("last_call", ctypes.c_bool),
3251-
]
3252-
3253-
3254-
# // Type of pointer to the beam_search_callback function.
3255-
# // void* callback_data is any custom data passed to llama_beam_search, that is subsequently
3256-
# // passed back to beam_search_callback. This avoids having to use global variables in the callback.
3257-
# typedef void (*llama_beam_search_callback_fn_t)(void * callback_data, struct llama_beams_state);
3258-
llama_beam_search_callback_fn_t = ctypes.CFUNCTYPE(
3259-
None, ctypes.c_void_p, llama_beams_state
3260-
)
3261-
3262-
3263-
# /// @details Deterministically returns entire sentence constructed by a beam search.
3264-
# /// @param ctx Pointer to the llama_context.
3265-
# /// @param callback Invoked for each iteration of the beam_search loop, passing in beams_state.
3266-
# /// @param callback_data A pointer that is simply passed back to callback.
3267-
# /// @param n_beams Number of beams to use.
3268-
# /// @param n_past Number of tokens already evaluated.
3269-
# /// @param n_predict Maximum number of tokens to predict. EOS may occur earlier.
3270-
# /// @param n_threads Number of threads as passed to llama_eval().
3271-
# LLAMA_API void llama_beam_search(
3272-
# struct llama_context * ctx,
3273-
# llama_beam_search_callback_fn_t callback,
3274-
# void * callback_data,
3275-
# size_t n_beams,
3276-
# int32_t n_past,
3277-
# int32_t n_predict);
3278-
@ctypes_function(
3279-
"llama_beam_search",
3280-
[
3281-
llama_context_p_ctypes,
3282-
llama_beam_search_callback_fn_t,
3283-
ctypes.c_void_p,
3284-
ctypes.c_size_t,
3285-
ctypes.c_int32,
3286-
ctypes.c_int32,
3287-
],
3288-
None,
3289-
)
3290-
def llama_beam_search(
3291-
ctx: llama_context_p,
3292-
callback: CtypesFuncPointer,
3293-
callback_data: ctypes.c_void_p,
3294-
n_beams: Union[ctypes.c_size_t, int],
3295-
n_past: Union[ctypes.c_int, int],
3296-
n_predict: Union[ctypes.c_int, int],
3297-
/,
3298-
): ...
3299-
3300-
33013236
# /// @details Build a split GGUF final path for this chunk.
33023237
# /// llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
33033238
# // Returns the split_path length.

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.