Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ad675e1

Browse filesBrowse files
authored
Added support for . (any character) token in grammar engine. (#6467)
* Added support for . (any characer) token in grammar engine. * Add integration tests for any-character symbol.
1 parent a143c04 commit ad675e1
Copy full SHA for ad675e1

File tree

Expand file treeCollapse file tree

4 files changed

+52
-2
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+52
-2
lines changed

‎common/grammar-parser.cpp

Copy file name to clipboardExpand all lines: common/grammar-parser.cpp
+11Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,10 @@ namespace grammar_parser {
266266
throw std::runtime_error(std::string("expecting ')' at ") + pos);
267267
}
268268
pos = parse_space(pos + 1, is_nested);
269+
} else if (*pos == '.') { // any char
270+
last_sym_start = out_elements.size();
271+
out_elements.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
272+
pos = parse_space(pos + 1, is_nested);
269273
} else if (*pos == '*') {
270274
pos = parse_space(pos + 1, is_nested);
271275
handle_repetitions(0, -1);
@@ -401,6 +405,7 @@ namespace grammar_parser {
401405
case LLAMA_GRETYPE_CHAR_NOT: return true;
402406
case LLAMA_GRETYPE_CHAR_ALT: return true;
403407
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
408+
case LLAMA_GRETYPE_CHAR_ANY: return true;
404409
default: return false;
405410
}
406411
}
@@ -415,6 +420,7 @@ namespace grammar_parser {
415420
case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
416421
case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
417422
case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
423+
case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
418424
}
419425
switch (elem.type) {
420426
case LLAMA_GRETYPE_END:
@@ -426,6 +432,7 @@ namespace grammar_parser {
426432
case LLAMA_GRETYPE_CHAR_NOT:
427433
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
428434
case LLAMA_GRETYPE_CHAR_ALT:
435+
case LLAMA_GRETYPE_CHAR_ANY:
429436
fprintf(file, "(\"");
430437
print_grammar_char(file, elem.value);
431438
fprintf(file, "\") ");
@@ -483,11 +490,15 @@ namespace grammar_parser {
483490
}
484491
print_grammar_char(file, elem.value);
485492
break;
493+
case LLAMA_GRETYPE_CHAR_ANY:
494+
fprintf(file, ".");
495+
break;
486496
}
487497
if (is_char_element(elem)) {
488498
switch (rule[i + 1].type) {
489499
case LLAMA_GRETYPE_CHAR_ALT:
490500
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
501+
case LLAMA_GRETYPE_CHAR_ANY:
491502
break;
492503
default:
493504
fprintf(file, "] ");

‎llama.cpp

Copy file name to clipboardExpand all lines: llama.cpp
+10-2Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13640,7 +13640,7 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
1364013640
const uint32_t chr) {
1364113641

1364213642
bool found = false;
13643-
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR;
13643+
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
1364413644

1364513645
GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
1364613646

@@ -13649,6 +13649,10 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
1364913649
// inclusive range, e.g. [a-z]
1365013650
found = found || (pos->value <= chr && chr <= pos[1].value);
1365113651
pos += 2;
13652+
} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
13653+
// Any character matches "."
13654+
found = true;
13655+
pos += 1;
1365213656
} else {
1365313657
// exact char match, e.g. [a] or "a"
1365413658
found = found || pos->value == chr;
@@ -13666,7 +13670,7 @@ static bool llama_grammar_match_partial_char(
1366613670
const llama_grammar_element * pos,
1366713671
const llama_partial_utf8 partial_utf8) {
1366813672

13669-
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR;
13673+
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
1367013674
GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
1367113675

1367213676
uint32_t partial_value = partial_utf8.value;
@@ -13696,6 +13700,9 @@ static bool llama_grammar_match_partial_char(
1369613700
return is_positive_char;
1369713701
}
1369813702
pos += 2;
13703+
} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
13704+
// Any character matches "."
13705+
return true;
1369913706
} else {
1370013707
// exact char match, e.g. [a] or "a"
1370113708
if (low <= pos->value && pos->value <= high) {
@@ -13756,6 +13763,7 @@ static void llama_grammar_advance_stack(
1375613763
}
1375713764
case LLAMA_GRETYPE_CHAR:
1375813765
case LLAMA_GRETYPE_CHAR_NOT:
13766+
case LLAMA_GRETYPE_CHAR_ANY:
1375913767
if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
1376013768
// only add the stack if it's not a duplicate of one we already have
1376113769
new_stacks.emplace_back(stack);

‎llama.h

Copy file name to clipboardExpand all lines: llama.h
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,9 @@ extern "C" {
365365
// modifies a preceding LLAMA_GRETYPE_CHAR or
366366
// LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA])
367367
LLAMA_GRETYPE_CHAR_ALT = 6,
368+
369+
// any character (.)
370+
LLAMA_GRETYPE_CHAR_ANY = 7,
368371
};
369372

370373
typedef struct llama_grammar_element {

‎tests/test-grammar-integration.cpp

Copy file name to clipboardExpand all lines: tests/test-grammar-integration.cpp
+28Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,33 @@ static void test_complex_grammar() {
205205
);
206206
}
207207

208+
static void test_special_chars() {
209+
// A collection of tests to exercise special characters such as "."
210+
test_grammar(
211+
"special characters",
212+
// Grammar
213+
R"""(
214+
root ::= ... "abc" ...
215+
)""",
216+
// Passing strings
217+
{
218+
"abcabcabc",
219+
"aaaabcccc",
220+
// NOTE: Also ensures that multi-byte characters still count as a single character
221+
"🔵🟠✅abc❌🟠🔵"
222+
},
223+
// Failing strings
224+
{
225+
"aaabcccc",
226+
"aaaaabcccc",
227+
"aaaabccc",
228+
"aaaabccccc",
229+
"🔵🟠✅❌abc❌✅🟠🔵"
230+
"🔵🟠abc🟠🔵"
231+
}
232+
);
233+
}
234+
208235
static void test_quantifiers() {
209236
// A collection of tests to exercise * + and ? quantifiers
210237

@@ -445,6 +472,7 @@ int main() {
445472
fprintf(stdout, "Running grammar integration tests...\n");
446473
test_simple_grammar();
447474
test_complex_grammar();
475+
test_special_chars();
448476
test_quantifiers();
449477
test_failure_missing_root();
450478
test_failure_missing_reference();

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.