Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e30b3e3

Browse filesBrowse files
committed
llama : deprecate explicit kv_self defrag/update calls
ggml-ci
1 parent ca69f32 commit e30b3e3
Copy full SHA for e30b3e3

File tree

Expand file treeCollapse file tree

3 files changed

+8
-14
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+8
-14
lines changed

‎examples/passkey/passkey.cpp

Copy file name to clipboardExpand all lines: examples/passkey/passkey.cpp
+2-7Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,8 @@ int main(int argc, char ** argv) {
133133
const int ib = i/n_batch - 1;
134134
const int bd = n_batch_grp*(n_grp - 1);
135135

136-
llama_kv_self_seq_add (ctx, 0, n_past - n_batch, n_past, ib*bd);
137-
llama_kv_self_seq_div (ctx, 0, n_past - n_batch + ib*bd, n_past + ib*bd, n_grp);
138-
llama_kv_self_update (ctx);
136+
llama_kv_self_seq_add(ctx, 0, n_past - n_batch, n_past, ib*bd);
137+
llama_kv_self_seq_div(ctx, 0, n_past - n_batch + ib*bd, n_past + ib*bd, n_grp);
139138

140139
n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1;
141140
}
@@ -169,8 +168,6 @@ int main(int argc, char ** argv) {
169168

170169
llama_kv_self_seq_rm (ctx, 0, n_keep , n_keep + n_discard);
171170
llama_kv_self_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard);
172-
//llama_kv_self_defrag (ctx);
173-
llama_kv_self_update (ctx);
174171

175172
n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1;
176173

@@ -200,8 +197,6 @@ int main(int argc, char ** argv) {
200197

201198
llama_kv_self_seq_rm (ctx, 0, n_keep , n_keep + n_discard);
202199
llama_kv_self_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard);
203-
//llama_kv_self_defrag (ctx);
204-
llama_kv_self_update (ctx);
205200

206201
n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1;
207202
}

‎include/llama.h

Copy file name to clipboardExpand all lines: include/llama.h
+4-7Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,6 @@ extern "C" {
652652
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
653653
// If the KV cache is RoPEd, the KV data is updated accordingly:
654654
// - lazily on next llama_decode()
655-
// - explicitly with llama_kv_self_update()
656655
// p0 < 0 : [0, p1]
657656
// p1 < 0 : [p0, inf)
658657
LLAMA_API void llama_kv_self_seq_add(
@@ -665,7 +664,6 @@ extern "C" {
665664
// Integer division of the positions by factor of `d > 1`
666665
// If the KV cache is RoPEd, the KV data is updated accordingly:
667666
// - lazily on next llama_decode()
668-
// - explicitly with llama_kv_self_update()
669667
// p0 < 0 : [0, p1]
670668
// p1 < 0 : [p0, inf)
671669
LLAMA_API void llama_kv_self_seq_div(
@@ -693,16 +691,15 @@ extern "C" {
693691
// Defragment the KV cache
694692
// This will be applied:
695693
// - lazily on next llama_decode()
696-
// - explicitly with llama_kv_self_update()
697-
// TODO: deprecate and always update the cache lazily [TAG: API_KV_NO_DEFRAG]
698-
LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx);
694+
LLAMA_API DEPRECATED(void llama_kv_self_defrag(struct llama_context * ctx),
695+
"simply remove this call, the context will automatically decide when to do a defragmentation based on 'defrag_thold'");
699696

700697
// Check if the context supports KV cache shifting
701698
LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx);
702699

703700
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
704-
// TODO: deprecate and always update the cache lazily [TAG: API_KV_NO_DEFRAG]
705-
LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
701+
LLAMA_API DEPRECATED(void llama_kv_self_update(struct llama_context * ctx),
702+
"simply remove this call, updates are applied lazily on the next llama_decode()");
706703

707704
//
708705
// State / sessions

‎src/llama-context.cpp

Copy file name to clipboardExpand all lines: src/llama-context.cpp
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,6 +2283,7 @@ llama_kv_cache * llama_get_kv_self(llama_context * ctx) {
22832283
return ctx->get_kv_self();
22842284
}
22852285

2286+
// deprecated
22862287
void llama_kv_self_update(llama_context * ctx) {
22872288
ctx->kv_self_update();
22882289
}
@@ -2537,6 +2538,7 @@ llama_pos llama_kv_self_seq_pos_max(llama_context * ctx, llama_seq_id seq_id) {
25372538
return kv->seq_pos_max(seq_id);
25382539
}
25392540

2541+
// deprecated
25402542
void llama_kv_self_defrag(llama_context * ctx) {
25412543
auto * kv = ctx->get_kv_self();
25422544
if (!kv) {

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.