Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit cc4826d

Browse filesBrowse files
Inline pg_popcount{32,64} into pg_popcount().
On some systems, calls to pg_popcount{32,64} are indirected through a function pointer. This commit converts pg_popcount() to a function pointer on those systems so that we can inline the appropriate pg_popcount{32,64} implementations into each of the pg_popcount() implementations. Since pg_popcount() may call pg_popcount{32,64} several times, this can significantly improve its performance. Suggested-by: David Rowley Reviewed-by: David Rowley Discussion: https://postgr.es/m/CAApHDvrb7MJRB6JuKLDEY2x_LKdFHwVbogpjZBCX547i5%2BrXOQ%40mail.gmail.com
1 parent b7e2121 commit cc4826d
Copy full SHA for cc4826d

File tree

Expand file treeCollapse file tree

2 files changed

+121
-39
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+121
-39
lines changed

‎src/include/port/pg_bitutils.h

Copy file name to clipboardExpand all lines: src/include/port/pg_bitutils.h
+2-3Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,17 +302,16 @@ pg_ceil_log2_64(uint64 num)
302302
/* Attempt to use the POPCNT instruction, but perform a runtime check first */
303303
extern PGDLLIMPORT int (*pg_popcount32) (uint32 word);
304304
extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
305+
extern PGDLLIMPORT uint64 (*pg_popcount) (const char *buf, int bytes);
305306

306307
#else
307308
/* Use a portable implementation -- no need for a function pointer. */
308309
extern int pg_popcount32(uint32 word);
309310
extern int pg_popcount64(uint64 word);
311+
extern uint64 pg_popcount(const char *buf, int bytes);
310312

311313
#endif /* TRY_POPCNT_FAST */
312314

313-
/* Count the number of one-bits in a byte array */
314-
extern uint64 pg_popcount(const char *buf, int bytes);
315-
316315
/*
317316
* Rotate the bits of "word" to the right/left by n bits.
318317
*/

‎src/port/pg_bitutils.c

Copy file name to clipboardExpand all lines: src/port/pg_bitutils.c
+119-36Lines changed: 119 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,22 @@ const uint8 pg_number_of_ones[256] = {
103103
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104104
};
105105

106-
static int pg_popcount32_slow(uint32 word);
107-
static int pg_popcount64_slow(uint64 word);
106+
static inline int pg_popcount32_slow(uint32 word);
107+
static inline int pg_popcount64_slow(uint64 word);
108+
static uint64 pg_popcount_slow(const char *buf, int bytes);
108109

109110
#ifdef TRY_POPCNT_FAST
110111
static bool pg_popcount_available(void);
111112
static int pg_popcount32_choose(uint32 word);
112113
static int pg_popcount64_choose(uint64 word);
113-
static int pg_popcount32_fast(uint32 word);
114-
static int pg_popcount64_fast(uint64 word);
114+
static uint64 pg_popcount_choose(const char *buf, int bytes);
115+
static inline int pg_popcount32_fast(uint32 word);
116+
static inline int pg_popcount64_fast(uint64 word);
117+
static uint64 pg_popcount_fast(const char *buf, int bytes);
115118

116119
int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
117120
int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
121+
uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose;
118122
#endif /* TRY_POPCNT_FAST */
119123

120124
#ifdef TRY_POPCNT_FAST
@@ -151,11 +155,13 @@ pg_popcount32_choose(uint32 word)
151155
{
152156
pg_popcount32 = pg_popcount32_fast;
153157
pg_popcount64 = pg_popcount64_fast;
158+
pg_popcount = pg_popcount_fast;
154159
}
155160
else
156161
{
157162
pg_popcount32 = pg_popcount32_slow;
158163
pg_popcount64 = pg_popcount64_slow;
164+
pg_popcount = pg_popcount_slow;
159165
}
160166

161167
return pg_popcount32(word);
@@ -168,21 +174,42 @@ pg_popcount64_choose(uint64 word)
168174
{
169175
pg_popcount32 = pg_popcount32_fast;
170176
pg_popcount64 = pg_popcount64_fast;
177+
pg_popcount = pg_popcount_fast;
171178
}
172179
else
173180
{
174181
pg_popcount32 = pg_popcount32_slow;
175182
pg_popcount64 = pg_popcount64_slow;
183+
pg_popcount = pg_popcount_slow;
176184
}
177185

178186
return pg_popcount64(word);
179187
}
180188

189+
static uint64
190+
pg_popcount_choose(const char *buf, int bytes)
191+
{
192+
if (pg_popcount_available())
193+
{
194+
pg_popcount32 = pg_popcount32_fast;
195+
pg_popcount64 = pg_popcount64_fast;
196+
pg_popcount = pg_popcount_fast;
197+
}
198+
else
199+
{
200+
pg_popcount32 = pg_popcount32_slow;
201+
pg_popcount64 = pg_popcount64_slow;
202+
pg_popcount = pg_popcount_slow;
203+
}
204+
205+
return pg_popcount(buf, bytes);
206+
}
207+
181208
/*
182209
* pg_popcount32_fast
183210
* Return the number of 1 bits set in word
184211
*/
185-
static int
212+
static inline int
186213
pg_popcount32_fast(uint32 word)
187214
{
188215
#ifdef _MSC_VER
@@ -199,7 +226,7 @@ __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
199226
* pg_popcount64_fast
200227
* Return the number of 1 bits set in word
201228
*/
202-
static int
229+
static inline int
203230
pg_popcount64_fast(uint64 word)
204231
{
205232
#ifdef _MSC_VER
@@ -212,14 +239,60 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
212239
#endif
213240
}
214241

242+
/*
243+
* pg_popcount_fast
244+
* Returns the number of 1-bits in buf
245+
*/
246+
static uint64
247+
pg_popcount_fast(const char *buf, int bytes)
248+
{
249+
uint64 popcnt = 0;
250+
251+
#if SIZEOF_VOID_P >= 8
252+
/* Process in 64-bit chunks if the buffer is aligned. */
253+
if (buf == (const char *) TYPEALIGN(8, buf))
254+
{
255+
const uint64 *words = (const uint64 *) buf;
256+
257+
while (bytes >= 8)
258+
{
259+
popcnt += pg_popcount64_fast(*words++);
260+
bytes -= 8;
261+
}
262+
263+
buf = (const char *) words;
264+
}
265+
#else
266+
/* Process in 32-bit chunks if the buffer is aligned. */
267+
if (buf == (const char *) TYPEALIGN(4, buf))
268+
{
269+
const uint32 *words = (const uint32 *) buf;
270+
271+
while (bytes >= 4)
272+
{
273+
popcnt += pg_popcount32_fast(*words++);
274+
bytes -= 4;
275+
}
276+
277+
buf = (const char *) words;
278+
}
279+
#endif
280+
281+
/* Process any remaining bytes */
282+
while (bytes--)
283+
popcnt += pg_number_of_ones[(unsigned char) *buf++];
284+
285+
return popcnt;
286+
}
287+
215288
#endif /* TRY_POPCNT_FAST */
216289

217290

218291
/*
219292
* pg_popcount32_slow
220293
* Return the number of 1 bits set in word
221294
*/
222-
static int
295+
static inline int
223296
pg_popcount32_slow(uint32 word)
224297
{
225298
#ifdef HAVE__BUILTIN_POPCOUNT
@@ -241,7 +314,7 @@ pg_popcount32_slow(uint32 word)
241314
* pg_popcount64_slow
242315
* Return the number of 1 bits set in word
243316
*/
244-
static int
317+
static inline int
245318
pg_popcount64_slow(uint64 word)
246319
{
247320
#ifdef HAVE__BUILTIN_POPCOUNT
@@ -265,35 +338,12 @@ pg_popcount64_slow(uint64 word)
265338
#endif /* HAVE__BUILTIN_POPCOUNT */
266339
}
267340

268-
#ifndef TRY_POPCNT_FAST
269-
270341
/*
271-
* When the POPCNT instruction is not available, there's no point in using
272-
* function pointers to vary the implementation between the fast and slow
273-
* method. We instead just make these actual external functions when
274-
* TRY_POPCNT_FAST is not defined. The compiler should be able to inline
275-
* the slow versions here.
276-
*/
277-
int
278-
pg_popcount32(uint32 word)
279-
{
280-
return pg_popcount32_slow(word);
281-
}
282-
283-
int
284-
pg_popcount64(uint64 word)
285-
{
286-
return pg_popcount64_slow(word);
287-
}
288-
289-
#endif /* !TRY_POPCNT_FAST */
290-
291-
/*
292-
* pg_popcount
342+
* pg_popcount_slow
293343
* Returns the number of 1-bits in buf
294344
*/
295-
uint64
296-
pg_popcount(const char *buf, int bytes)
345+
static uint64
346+
pg_popcount_slow(const char *buf, int bytes)
297347
{
298348
uint64 popcnt = 0;
299349

@@ -305,7 +355,7 @@ pg_popcount(const char *buf, int bytes)
305355

306356
while (bytes >= 8)
307357
{
308-
popcnt += pg_popcount64(*words++);
358+
popcnt += pg_popcount64_slow(*words++);
309359
bytes -= 8;
310360
}
311361

@@ -319,7 +369,7 @@ pg_popcount(const char *buf, int bytes)
319369

320370
while (bytes >= 4)
321371
{
322-
popcnt += pg_popcount32(*words++);
372+
popcnt += pg_popcount32_slow(*words++);
323373
bytes -= 4;
324374
}
325375

@@ -333,3 +383,36 @@ pg_popcount(const char *buf, int bytes)
333383

334384
return popcnt;
335385
}
386+
387+
#ifndef TRY_POPCNT_FAST
388+
389+
/*
390+
* When the POPCNT instruction is not available, there's no point in using
391+
* function pointers to vary the implementation between the fast and slow
392+
* method. We instead just make these actual external functions when
393+
* TRY_POPCNT_FAST is not defined. The compiler should be able to inline
394+
* the slow versions here.
395+
*/
396+
int
397+
pg_popcount32(uint32 word)
398+
{
399+
return pg_popcount32_slow(word);
400+
}
401+
402+
int
403+
pg_popcount64(uint64 word)
404+
{
405+
return pg_popcount64_slow(word);
406+
}
407+
408+
/*
409+
* pg_popcount
410+
* Returns the number of 1-bits in buf
411+
*/
412+
uint64
413+
pg_popcount(const char *buf, int bytes)
414+
{
415+
return pg_popcount_slow(buf, bytes);
416+
}
417+
418+
#endif /* !TRY_POPCNT_FAST */

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.