@@ -2281,13 +2281,21 @@ static void ggml_vk_load_shaders(vk_device& device) {
2281
2281
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f32_f32 , " contig_cpy_f32_f32" , contig_cpy_f32_f32_len, contig_cpy_f32_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2282
2282
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f32_f16 , " contig_cpy_f32_f16" , contig_cpy_f32_f16_len, contig_cpy_f32_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2283
2283
ggml_vk_create_pipeline (device, device->pipeline_contig_cpy_f16_f16 , " contig_cpy_f16_f16" , contig_cpy_f16_f16_len, contig_cpy_f16_f16_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {512 , 1 , 1 }, {}, 1 );
2284
-
2285
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2286
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_len, cpy_f32_q4_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2287
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_len, cpy_f32_q5_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2288
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_len, cpy_f32_q5_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2289
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_len, cpy_f32_q8_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2290
- ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2284
+ if (device->float_controls_rte_fp16 ) {
2285
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_rte_len, cpy_f32_q4_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2286
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_rte_len, cpy_f32_q4_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2287
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_rte_len, cpy_f32_q5_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2288
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_rte_len, cpy_f32_q5_1_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2289
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_rte_len, cpy_f32_q8_0_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2290
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_rte_len, cpy_f32_iq4_nl_rte_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2291
+ } else {
2292
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_0], " cpy_f32_q4_0" , cpy_f32_q4_0_len, cpy_f32_q4_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2293
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q4_1], " cpy_f32_q4_1" , cpy_f32_q4_1_len, cpy_f32_q4_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
2294
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_0], " cpy_f32_q5_0" , cpy_f32_q5_0_len, cpy_f32_q5_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
2295
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q5_1], " cpy_f32_q5_1" , cpy_f32_q5_1_len, cpy_f32_q5_1_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
2296
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_len, cpy_f32_q8_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
2297
+ ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
2298
+ }
2291
2299
2292
2300
ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_0], " cpy_q4_0_f32" , cpy_q4_0_f32_len, cpy_q4_0_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
2293
2301
ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_1], " cpy_q4_1_f32" , cpy_q4_1_f32_len, cpy_q4_1_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
0 commit comments