@@ -3467,9 +3467,12 @@ void llama_print_timings(struct llama_context * ctx) {
3467
3467
3468
3468
fprintf (stderr, " \n " );
3469
3469
fprintf (stderr, " %s: load time = %8.2f ms\n " , __func__, ctx->t_load_us / 1000.0 );
3470
- fprintf (stderr, " %s: sample time = %8.2f ms / %5d runs (%8.2f ms per token)\n " , __func__, 1e-3 * ctx->t_sample_us , n_sample, 1e-3 * ctx->t_sample_us / n_sample);
3471
- fprintf (stderr, " %s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n " , __func__, 1e-3 * ctx->t_p_eval_us , n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval);
3472
- fprintf (stderr, " %s: eval time = %8.2f ms / %5d runs (%8.2f ms per token)\n " , __func__, 1e-3 * ctx->t_eval_us , n_eval, 1e-3 * ctx->t_eval_us / n_eval);
3470
+ fprintf (stderr, " %s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n " ,
3471
+ __func__, 1e-3 * ctx->t_sample_us , n_sample, 1e-3 * ctx->t_sample_us / n_sample, 1e6 / ctx->t_sample_us * n_sample);
3472
+ fprintf (stderr, " %s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n " ,
3473
+ __func__, 1e-3 * ctx->t_p_eval_us , n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval, 1e6 / ctx->t_p_eval_us * n_p_eval);
3474
+ fprintf (stderr, " %s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n " ,
3475
+ __func__, 1e-3 * ctx->t_eval_us , n_eval, 1e-3 * ctx->t_eval_us / n_eval, 1e6 / ctx->t_eval_us * n_eval);
3473
3476
fprintf (stderr, " %s: total time = %8.2f ms\n " , __func__, (t_end_us - ctx->t_start_us )/1000.0 );
3474
3477
}
3475
3478
0 commit comments