Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ef3171d

Browse filesBrowse files
authored
ggml : workaround for missing _mm256_setr_m128i in GCC < 8 (abetlen#1638)
1 parent 555275a commit ef3171d
Copy full SHA for ef3171d

File tree

Expand file treeCollapse file tree

1 file changed

+9
-7
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+9
-7
lines changed

‎ggml.c

Copy file name to clipboardExpand all lines: ggml.c
+9-7Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,8 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
492492
// quantization
493493
//
494494

495+
#define MM256_SET_M128I(a, b) _mm256_insertf128_si256(_mm256_castsi128_si256(b), (a), 1)
496+
495497
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
496498
// multiply int8_t, add results pairwise twice
497499
static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
@@ -551,7 +553,7 @@ static inline __m256i bytes_from_bits_32(const uint8_t * x) {
551553
static inline __m256i bytes_from_nibbles_32(const uint8_t * rsi)
552554
{
553555
const __m128i tmp = _mm_loadu_si128((const __m128i *)rsi);
554-
const __m256i bytes = _mm256_set_m128i(_mm_srli_epi16(tmp, 4), tmp);
556+
const __m256i bytes = MM256_SET_M128I(_mm_srli_epi16(tmp, 4), tmp);
555557
const __m256i lowMask = _mm256_set1_epi8( 0xF );
556558
return _mm256_and_si256(lowMask, bytes);
557559
}
@@ -624,7 +626,7 @@ static inline __m256i bytes_from_bits_32(const uint8_t * x) {
624626
bytesh = _mm_or_si128(bytesh, bit_mask);
625627
bytesl = _mm_cmpeq_epi8(bytesl, _mm_set1_epi64x(-1));
626628
bytesh = _mm_cmpeq_epi8(bytesh, _mm_set1_epi64x(-1));
627-
return _mm256_set_m128i(bytesh, bytesl);
629+
return MM256_SET_M128I(bytesh, bytesl);
628630
}
629631

630632
// Unpack 32 4-bit fields into 32 bytes
@@ -637,15 +639,15 @@ static inline __m256i bytes_from_nibbles_32(const uint8_t * rsi)
637639
const __m128i lowMask = _mm_set1_epi8(0xF);
638640
tmpl = _mm_and_si128(lowMask, tmpl);
639641
tmph = _mm_and_si128(lowMask, tmph);
640-
return _mm256_set_m128i(tmph, tmpl);
642+
return MM256_SET_M128I(tmph, tmpl);
641643
}
642644

643645
// add int16_t pairwise and return as float vector
644646
static inline __m256 sum_i16_pairs_float(const __m128i xh, const __m128i xl) {
645647
const __m128i ones = _mm_set1_epi16(1);
646648
const __m128i summed_pairsl = _mm_madd_epi16(ones, xl);
647649
const __m128i summed_pairsh = _mm_madd_epi16(ones, xh);
648-
const __m256i summed_pairs = _mm256_set_m128i(summed_pairsh, summed_pairsl);
650+
const __m256i summed_pairs = MM256_SET_M128I(summed_pairsh, summed_pairsl);
649651
return _mm256_cvtepi32_ps(summed_pairs);
650652
}
651653

@@ -2350,7 +2352,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
23502352
const __m128i i32_1 = mul_sum_i8_pairs(bx, by);
23512353

23522354
// Convert int32_t to float
2353-
__m256 p = _mm256_cvtepi32_ps(_mm256_set_m128i(i32_0, i32_1));
2355+
__m256 p = _mm256_cvtepi32_ps(MM256_SET_M128I(i32_0, i32_1));
23542356

23552357
// Apply the scale, and accumulate
23562358
acc = _mm256_add_ps(_mm256_mul_ps( d, p ), acc);
@@ -2826,7 +2828,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
28262828
__m128i bxh = _mm256_extractf128_si256(bx, 1);
28272829
bxl = _mm_or_si128(bxl, bxhil);
28282830
bxh = _mm_or_si128(bxh, bxhih);
2829-
bx = _mm256_set_m128i(bxh, bxl);
2831+
bx = MM256_SET_M128I(bxh, bxl);
28302832

28312833
const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
28322834

@@ -3082,7 +3084,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
30823084
__m128i bxh = _mm256_extractf128_si256(bx, 1);
30833085
bxl = _mm_or_si128(bxl, bxhil);
30843086
bxh = _mm_or_si128(bxh, bxhih);
3085-
bx = _mm256_set_m128i(bxh, bxl);
3087+
bx = MM256_SET_M128I(bxh, bxl);
30863088

30873089
const __m256 dy = _mm256_set1_ps(y[i].d);
30883090
const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.