Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ac188a2

Browse filesBrowse files
committed
Added low level grammar API
1 parent 91bf8fa commit ac188a2
Copy full SHA for ac188a2

File tree

Expand file treeCollapse file tree

2 files changed

+1365
-0
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+1365
-0
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+34Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,23 @@ def llama_sample_temperature(
11571157
_lib.llama_sample_temperature.restype = None
11581158

11591159

1160+
# LLAMA_API void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * candidates, const struct llama_grammar * grammar);
1161+
def llama_sample_grammar(
1162+
ctx: llama_context_p,
1163+
candidates, # type: _Pointer[llama_token_data_array]
1164+
grammar, # type: llama_grammar_p
1165+
):
1166+
return _lib.llama_sample_grammar(ctx, candidates, grammar)
1167+
1168+
1169+
_lib.llama_sample_grammar.argtypes = [
1170+
llama_context_p,
1171+
llama_token_data_array_p,
1172+
llama_grammar_p,
1173+
]
1174+
_lib.llama_sample_grammar.restype = None
1175+
1176+
11601177
# @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
11611178
# @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
11621179
# @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@@ -1244,6 +1261,23 @@ def llama_sample_token(
12441261
_lib.llama_sample_token.restype = llama_token
12451262

12461263

1264+
# /// @details Accepts the sampled token into the grammar
1265+
# LLAMA_API void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token);
1266+
def llama_grammar_accept_token(
1267+
ctx: llama_context_p,
1268+
grammar: llama_grammar_p,
1269+
token: llama_token,
1270+
) -> None:
1271+
_lib.llama_grammar_accept_token(ctx, grammar, token)
1272+
1273+
1274+
_lib.llama_grammar_accept_token.argtypes = [
1275+
llama_context_p,
1276+
llama_grammar_p,
1277+
llama_token,
1278+
]
1279+
_lib.llama_grammar_accept_token.restype = None
1280+
12471281
# Performance information
12481282

12491283

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.