Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b24c304

Browse filesBrowse files
Added tokens per second to info prints (ggml-org#1928)
1 parent 0ede372 commit b24c304
Copy full SHA for b24c304

File tree

Expand file treeCollapse file tree

1 file changed

+6
-3
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+6
-3
lines changed

‎llama.cpp

Copy file name to clipboardExpand all lines: llama.cpp
+6-3Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3467,9 +3467,12 @@ void llama_print_timings(struct llama_context * ctx) {
34673467

34683468
fprintf(stderr, "\n");
34693469
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0);
3470-
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample);
3471-
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval);
3472-
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval);
3470+
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
3471+
__func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample, 1e6 / ctx->t_sample_us * n_sample);
3472+
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
3473+
__func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval, 1e6 / ctx->t_p_eval_us * n_p_eval);
3474+
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
3475+
__func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval, 1e6 / ctx->t_eval_us * n_eval);
34733476
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0);
34743477
}
34753478

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.