Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 949dfc1

Browse filesBrowse files
committed
mulmat-tune-tool: add --n_pass; document; fix tailing spaces
1 parent 8aebab9 commit 949dfc1
Copy full SHA for 949dfc1
Expand file treeCollapse file tree

17 files changed

+324
-134
lines changed

‎examples/mulmat-tune/README.md

Copy file name to clipboardExpand all lines: examples/mulmat-tune/README.md
+288-112Lines changed: 288 additions & 112 deletions
Large diffs are not rendered by default.
Loading
319 KB
Binary file not shown.

‎examples/mulmat-tune/bench-out/13b.q4_0.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/13b.q4_0.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 163 0 0 94 0
8888
256 17 759 0 0 171 0
8989
512 39 2837 0 0 321 0
90-

‎examples/mulmat-tune/bench-out/13b.q5_0.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/13b.q5_0.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 167 0 0 60 0
8888
256 20 733 0 0 129 0
8989
512 43 3462 0 0 262 0
90-

‎examples/mulmat-tune/bench-out/7b.f16.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/7b.f16.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,3 @@
8383
128 10 167 0 0 79 0
8484
256 19 835 0 0 136 0
8585
512 39 2856 0 0 283 0
86-

‎examples/mulmat-tune/bench-out/7b.f32.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/7b.f32.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,3 @@
8383
128 9 198 0 0 64 0
8484
256 20 766 0 0 166 0
8585
512 40 3464 0 0 276 0
86-

‎examples/mulmat-tune/bench-out/7b.q4_0.openblas.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/7b.q4_0.openblas.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 9 201 0 0 128 0
8888
256 19 886 0 0 172 0
8989
512 39 3227 0 0 405 0
90-

‎examples/mulmat-tune/bench-out/7b.q4_0.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/7b.q4_0.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 203 0 0 111 0
8888
256 19 705 0 0 165 0
8989
512 33 2832 0 0 313 0
90-

‎examples/mulmat-tune/bench-out/7b.q4_1.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/7b.q4_1.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 210 0 0 70 0
8888
256 20 856 0 0 128 0
8989
512 40 2949 0 0 334 0
90-

‎examples/mulmat-tune/bench-out/7b.q5_1.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/7b.q5_1.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 10 202 0 0 76 0
8888
256 20 850 0 0 123 0
8989
512 39 2944 0 0 301 0
90-

‎examples/mulmat-tune/bench-out/7b.q8_0.txt

Copy file name to clipboardExpand all lines: examples/mulmat-tune/bench-out/7b.q8_0.txt
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@
8787
128 9 206 0 0 79 0
8888
256 19 784 0 0 132 0
8989
512 38 2780 0 0 310 0
90-

‎examples/mulmat-tune/mulmat-tune-tool.c

Copy file name to clipboardExpand all lines: examples/mulmat-tune/mulmat-tune-tool.c
+30-7Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ static void print_blas_build_tips(void);
2424
static void progress(int i, int max);
2525
static bool prompt_yes_no(const char *prompt);
2626

27-
static void cmd_tune(struct ggml_mulmat_tune *b, bool verbose);
27+
static void cmd_tune(struct ggml_mulmat_tune *b, int n_pass, bool verbose);
2828
static void cmd_analyze(struct ggml_mulmat_tune *b);
2929

3030
static void usage(char *prog) {
@@ -41,6 +41,9 @@ static void usage(char *prog) {
4141
" default 10\n",
4242
"--backend BACKEND blas backend: CUDA | CL | CBLAS\n",
4343
" default: auto detect\n",
44+
"--n_pass number of passes to run\n",
45+
" default 3\n",
46+
" requires: in range [1, 5]\n",
4447
"--file FILE data file to write\n",
4548
" default stdout\n",
4649
"-y always answer \"yes\" to all prompts\n",
@@ -94,6 +97,7 @@ int main(int argc, char **argv) {
9497
const char *arg_model = NULL;
9598
const char *arg_type = NULL;
9699
const char *arg_m_num = NULL;
100+
const char *arg_n_pass = NULL;
97101
const char *arg_backend = NULL;
98102
const char *arg_file = NULL;
99103
bool always_yes = false;
@@ -114,6 +118,11 @@ int main(int argc, char **argv) {
114118
arg_m_num = argv[i + 1];
115119
++i;
116120
}
121+
} else if (strcmp(argv[i], "--n_pass") == 0) {
122+
if (i + 1 < argc) {
123+
arg_n_pass = argv[i + 1];
124+
++i;
125+
}
117126
} else if (strcmp(argv[i], "--backend") == 0) {
118127
if (i + 1 < argc) {
119128
arg_backend = argv[i + 1];
@@ -203,6 +212,20 @@ int main(int argc, char **argv) {
203212
}
204213
}
205214

215+
int n_pass = 3;
216+
{
217+
if (arg_n_pass != NULL) {
218+
int v = atoi(arg_n_pass);
219+
n_pass = v;
220+
}
221+
if (n_pass < 1 || n_pass > MAX_NUM_PASS) {
222+
fprintf(stderr, "invalid n_pass: %d, expect in range [1, 5]\n",
223+
n_pass);
224+
usage(argv[0]);
225+
exit(1);
226+
}
227+
}
228+
206229
{
207230
enum ggml_backend backend = GGML_BACKEND_UNKNOWN;
208231
if (arg_backend == NULL) {
@@ -265,7 +288,7 @@ int main(int argc, char **argv) {
265288
tune.model, tune.type_name, ggml_get_backend_name(tune.backend),
266289
tune.blas_vendor);
267290

268-
cmd_tune(&tune, true /* verbose */);
291+
cmd_tune(&tune, n_pass, true /* verbose */);
269292

270293
FILE *fp = NULL;
271294
if (arg_file != NULL) {
@@ -334,7 +357,7 @@ int main(int argc, char **argv) {
334357
return 0;
335358
}
336359

337-
void cmd_tune(struct ggml_mulmat_tune *tune, bool verbose) {
360+
void cmd_tune(struct ggml_mulmat_tune *tune, int n_pass, bool verbose) {
338361
size_t wsize = 0;
339362
void *q_buf = NULL;
340363
void *wdata = NULL;
@@ -477,21 +500,21 @@ void cmd_tune(struct ggml_mulmat_tune *tune, bool verbose) {
477500
// without memset, the first run may be significant slow.
478501
memset(wdata, 0, wsize);
479502

480-
int stage_time[NUM_BENCH];
481-
for (int i_bench = 0; i_bench < NUM_BENCH; i_bench++) {
503+
int stage_time[MAX_NUM_PASS];
504+
for (int i_bench = 0; i_bench < n_pass; i_bench++) {
482505
int t0 = (int)ggml_time_us();
483506

484507
ggml_internal_compute_forward_mul_mat(
485508
profile, stage, wsize, wdata, src0, src1, dst);
486509

487510
stage_time[i_bench] = (int)ggml_time_us() - t0;
488511
if (verbose) {
489-
progress(i_bench, NUM_BENCH);
512+
progress(i_bench, n_pass);
490513
}
491514
}
492515

493516
item->stages_time[stage] =
494-
tune_time_min(stage_time, NUM_BENCH);
517+
tune_time_min(stage_time, n_pass);
495518

496519
if (verbose) {
497520
line_len++;

‎examples/mulmat-tune/mulmat-tune.h

Copy file name to clipboardExpand all lines: examples/mulmat-tune/mulmat-tune.h
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
extern "C" {
99
#endif
1010

11-
#define NUM_BENCH 4
11+
#define MAX_NUM_PASS 5
1212

1313
#define GGML_MULMAT_N_SHAPES 6
1414
#define GGML_MULMAT_MAX_PROFILES 8

‎ggml.c

Copy file name to clipboardExpand all lines: ggml.c
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14374,7 +14374,7 @@ void ggml_graph_compute_mul_mat_set_task_profile(struct ggml_cgraph *cgraph) {
1437414374
if (shape != NULL) {
1437514375
memset(profile_time, 0, sizeof(profile_time));
1437614376
ggml_mulmat_tune_shape_estimate_time(shape, M, cgraph->n_threads, profile_time);
14377-
14377+
1437814378
int min = INT32_MAX;
1437914379
for (int j = 0; j < shape->n_profiles; j++) {
1438014380
int total = profile_time[j].total_time;
@@ -14413,7 +14413,7 @@ void ggml_graph_compute_mul_mat_set_task_profile(struct ggml_cgraph *cgraph) {
1441314413
}
1441414414
}
1441514415
}
14416-
14416+
1441714417
if (profile == NULL) {
1441814418
for (int j = 0; j < n_profiles; j++) {
1441914419
if (profiles[j].stages[0].backend == GGML_BACKEND_CPU &&

‎tests/test-mulmat-tune

Copy file name to clipboard
362 KB
Binary file not shown.

‎tests/test-mulmat-tune.c

Copy file name to clipboardExpand all lines: tests/test-mulmat-tune.c
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ void test_ggml_mulmat_tune_estimate_time_non_zero_NK(void) {
3535
const int m_start = m_step;
3636
const int m_num = 2;
3737

38-
ggml_mulmat_tune_setup_model(&tune, "7B", m_start, m_step, m_num);
38+
ggml_mulmat_tune_setup_model(&tune, "7B", m_num);
3939

4040
struct ggml_mulmat_tune_shape *shape = NULL;
4141
for (int i = 0; i < tune.n_shapes; i++) {
@@ -149,7 +149,7 @@ void test_ggml_mulmat_tune_estimate_time_non_zero_NK(void) {
149149
ggml_mulmat_tune_get_shape(&tune, shape->N, shape->K,
150150
shape->src0_type, shape->src1_type);
151151
GGML_ASSERT(matched_shape);
152-
152+
153153
ggml_mulmat_tune_shape_estimate_time(matched_shape, e->M, e->nth,
154154
profile_time);
155155

@@ -193,7 +193,7 @@ void test_ggml_mulmat_tune_estimate_time_zero_NK(void) {
193193
const int m_start = m_step;
194194
const int m_num = 2;
195195

196-
ggml_mulmat_tune_setup_model(&tune, "7B", m_start, m_step, m_num);
196+
ggml_mulmat_tune_setup_model(&tune, "7B", m_num);
197197

198198
struct ggml_mulmat_tune_shape *shape = NULL;
199199
for (int i = 0; i < tune.n_shapes; i++) {

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.