File tree Expand file tree Collapse file tree 3 files changed +6
-6
lines changed
Filter options
Expand file tree Collapse file tree 3 files changed +6
-6
lines changed
Original file line number Diff line number Diff line change @@ -348,7 +348,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
348
348
const auto & hparams = model.hparams ;
349
349
size_t vram_total = 0 ;
350
350
const int n_gpu = std::min (gpulayers, int (hparams.n_layer ));
351
- fprintf (stderr, " %s: [opencl ] offloading %d layers to GPU\n " , __func__, n_gpu);
351
+ fprintf (stderr, " %s: [GPU ] offloading %d layers to GPU\n " , __func__, n_gpu);
352
352
for (int i = 0 ; i < n_gpu; ++i) {
353
353
const auto & layer = model.layers [i];
354
354
layer.c_attn_q_proj_w ->backend = GGML_BACKEND_GPU;
@@ -373,7 +373,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
373
373
ggml_cuda_transform_tensor (layer.c_mlp_proj_w ->data ,layer.c_mlp_proj_w ); vram_total += ggml_nbytes (layer.c_mlp_proj_w );
374
374
#endif
375
375
}
376
- fprintf (stderr, " %s: [opencl ] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
376
+ fprintf (stderr, " %s: [GPU ] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
377
377
}
378
378
#endif
379
379
Original file line number Diff line number Diff line change @@ -301,7 +301,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
301
301
const auto & hparams = model.hparams ;
302
302
size_t vram_total = 0 ;
303
303
const int n_gpu = std::min (gpulayers, int (hparams.n_layers ));
304
- fprintf (stderr, " %s: [opencl ] offloading %d layers to GPU\n " , __func__, n_gpu);
304
+ fprintf (stderr, " %s: [GPU ] offloading %d layers to GPU\n " , __func__, n_gpu);
305
305
for (int i = 0 ; i < n_gpu; ++i) {
306
306
const auto & layer = model.layers [i];
307
307
layer.ffn_up_proj ->backend = GGML_BACKEND_GPU;
@@ -320,7 +320,7 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
320
320
ggml_cuda_transform_tensor (layer.c_attn_out_proj_weight ->data ,layer.c_attn_out_proj_weight ); vram_total += ggml_nbytes (layer.c_attn_out_proj_weight );
321
321
#endif
322
322
}
323
- fprintf (stderr, " %s: [opencl ] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
323
+ fprintf (stderr, " %s: [GPU ] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
324
324
}
325
325
#endif
326
326
Original file line number Diff line number Diff line change @@ -335,7 +335,7 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
335
335
const auto & hparams = model.hparams ;
336
336
size_t vram_total = 0 ;
337
337
const int n_gpu = std::min (gpulayers, int (hparams.n_layer ));
338
- fprintf (stderr, " %s: [opencl ] offloading %d layers to GPU\n " , __func__, n_gpu);
338
+ fprintf (stderr, " %s: [GPU ] offloading %d layers to GPU\n " , __func__, n_gpu);
339
339
for (int i = 0 ; i < n_gpu; ++i) {
340
340
const auto & layer = model.layers [i];
341
341
layer.c_attn_attn_w ->backend = GGML_BACKEND_GPU;
@@ -354,7 +354,7 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
354
354
ggml_cuda_transform_tensor (layer.c_mlp_proj_w ->data ,layer.c_mlp_proj_w ); vram_total += ggml_nbytes (layer.c_mlp_proj_w );
355
355
#endif
356
356
}
357
- fprintf (stderr, " %s: [opencl ] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
357
+ fprintf (stderr, " %s: [GPU ] total VRAM used: %zu MB\n " , __func__, vram_total / 1024 / 1024 );
358
358
}
359
359
#endif
360
360
You can’t perform that action at this time.
0 commit comments