Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 803f8ba

Browse filesBrowse files
authored
llama : deprecate explicit kv_self defrag/update calls (#13921)
ggml-ci
1 parent 3600cc2 commit 803f8ba
Copy full SHA for 803f8ba

File tree

Expand file treeCollapse file tree

3 files changed

+8
-14
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+8
-14
lines changed

‎examples/passkey/passkey.cpp

Copy file name to clipboardExpand all lines: examples/passkey/passkey.cpp
+2-7Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,8 @@ int main(int argc, char ** argv) {
133133
const int ib = i/n_batch - 1;
134134
const int bd = n_batch_grp*(n_grp - 1);
135135

136-
llama_kv_self_seq_add (ctx, 0, n_past - n_batch, n_past, ib*bd);
137-
llama_kv_self_seq_div (ctx, 0, n_past - n_batch + ib*bd, n_past + ib*bd, n_grp);
138-
llama_kv_self_update (ctx);
136+
llama_kv_self_seq_add(ctx, 0, n_past - n_batch, n_past, ib*bd);
137+
llama_kv_self_seq_div(ctx, 0, n_past - n_batch + ib*bd, n_past + ib*bd, n_grp);
139138

140139
n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1;
141140
}
@@ -169,8 +168,6 @@ int main(int argc, char ** argv) {
169168

170169
llama_kv_self_seq_rm (ctx, 0, n_keep , n_keep + n_discard);
171170
llama_kv_self_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard);
172-
//llama_kv_self_defrag (ctx);
173-
llama_kv_self_update (ctx);
174171

175172
n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1;
176173

@@ -200,8 +197,6 @@ int main(int argc, char ** argv) {
200197

201198
llama_kv_self_seq_rm (ctx, 0, n_keep , n_keep + n_discard);
202199
llama_kv_self_seq_add(ctx, 0, n_keep + n_discard, n_ctx, -n_discard);
203-
//llama_kv_self_defrag (ctx);
204-
llama_kv_self_update (ctx);
205200

206201
n_past = llama_kv_self_seq_pos_max(ctx, 0) + 1;
207202
}

‎include/llama.h

Copy file name to clipboardExpand all lines: include/llama.h
+4-7Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,6 @@ extern "C" {
655655
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
656656
// If the KV cache is RoPEd, the KV data is updated accordingly:
657657
// - lazily on next llama_decode()
658-
// - explicitly with llama_kv_self_update()
659658
// p0 < 0 : [0, p1]
660659
// p1 < 0 : [p0, inf)
661660
LLAMA_API void llama_kv_self_seq_add(
@@ -668,7 +667,6 @@ extern "C" {
668667
// Integer division of the positions by factor of `d > 1`
669668
// If the KV cache is RoPEd, the KV data is updated accordingly:
670669
// - lazily on next llama_decode()
671-
// - explicitly with llama_kv_self_update()
672670
// p0 < 0 : [0, p1]
673671
// p1 < 0 : [p0, inf)
674672
LLAMA_API void llama_kv_self_seq_div(
@@ -696,16 +694,15 @@ extern "C" {
696694
// Defragment the KV cache
697695
// This will be applied:
698696
// - lazily on next llama_decode()
699-
// - explicitly with llama_kv_self_update()
700-
// TODO: deprecate and always update the cache lazily [TAG: API_KV_NO_DEFRAG]
701-
LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx);
697+
LLAMA_API DEPRECATED(void llama_kv_self_defrag(struct llama_context * ctx),
698+
"simply remove this call, the context will automatically decide when to do a defragmentation based on 'defrag_thold'");
702699

703700
// Check if the context supports KV cache shifting
704701
LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx);
705702

706703
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
707-
// TODO: deprecate and always update the cache lazily [TAG: API_KV_NO_DEFRAG]
708-
LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
704+
LLAMA_API DEPRECATED(void llama_kv_self_update(struct llama_context * ctx),
705+
"simply remove this call, updates are applied lazily on the next llama_decode()");
709706

710707
//
711708
// State / sessions

‎src/llama-context.cpp

Copy file name to clipboardExpand all lines: src/llama-context.cpp
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2281,6 +2281,7 @@ llama_kv_cache * llama_get_kv_self(llama_context * ctx) {
22812281
return ctx->get_kv_self();
22822282
}
22832283

2284+
// deprecated
22842285
void llama_kv_self_update(llama_context * ctx) {
22852286
ctx->kv_self_update();
22862287
}
@@ -2535,6 +2536,7 @@ llama_pos llama_kv_self_seq_pos_max(llama_context * ctx, llama_seq_id seq_id) {
25352536
return kv->seq_pos_max(seq_id);
25362537
}
25372538

2539+
// deprecated
25382540
void llama_kv_self_defrag(llama_context * ctx) {
25392541
auto * kv = ctx->get_kv_self();
25402542
if (!kv) {

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.