Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a76ef15

Browse filesBrowse files
committed
Add sort support routine for the UUID data type.
This introduces a simple encoding scheme to produce abbreviated keys: pack as many bytes of each UUID as will fit into a Datum. On little-endian machines, a byteswap is also performed; the abbreviated comparator can therefore just consist of a simple 3-way unsigned integer comparison. The purpose of this change is to speed up sorting data on a column of type UUID. Peter Geoghegan
1 parent 5644419 commit a76ef15
Copy full SHA for a76ef15

File tree

Expand file treeCollapse file tree

4 files changed

+191
-0
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+191
-0
lines changed

‎src/backend/utils/adt/uuid.c

Copy file name to clipboardExpand all lines: src/backend/utils/adt/uuid.c
+187Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,12 @@
1414
#include "postgres.h"
1515

1616
#include "access/hash.h"
17+
#include "lib/hyperloglog.h"
1718
#include "libpq/pqformat.h"
19+
#include "port/pg_bswap.h"
1820
#include "utils/builtins.h"
21+
#include "utils/guc.h"
22+
#include "utils/sortsupport.h"
1923
#include "utils/uuid.h"
2024

2125
/* uuid size in bytes */
@@ -27,8 +31,21 @@ struct pg_uuid_t
2731
unsigned char data[UUID_LEN];
2832
};
2933

34+
/* sortsupport for uuid */
35+
typedef struct
36+
{
37+
int64 input_count; /* number of non-null values seen */
38+
bool estimating; /* true if estimating cardinality */
39+
40+
hyperLogLogState abbr_card; /* cardinality estimator */
41+
} uuid_sortsupport_state;
42+
3043
static void string_to_uuid(const char *source, pg_uuid_t *uuid);
3144
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
45+
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
46+
static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
47+
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
48+
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
3249

3350
Datum
3451
uuid_in(PG_FUNCTION_ARGS)
@@ -222,6 +239,176 @@ uuid_cmp(PG_FUNCTION_ARGS)
222239
PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
223240
}
224241

242+
/*
243+
* Sort support strategy routine
244+
*/
245+
Datum
246+
uuid_sortsupport(PG_FUNCTION_ARGS)
247+
{
248+
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
249+
250+
ssup->comparator = uuid_fast_cmp;
251+
ssup->ssup_extra = NULL;
252+
253+
if (ssup->abbreviate)
254+
{
255+
uuid_sortsupport_state *uss;
256+
MemoryContext oldcontext;
257+
258+
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
259+
260+
uss = palloc(sizeof(uuid_sortsupport_state));
261+
uss->input_count = 0;
262+
uss->estimating = true;
263+
initHyperLogLog(&uss->abbr_card, 10);
264+
265+
ssup->ssup_extra = uss;
266+
267+
ssup->comparator = uuid_cmp_abbrev;
268+
ssup->abbrev_converter = uuid_abbrev_convert;
269+
ssup->abbrev_abort = uuid_abbrev_abort;
270+
ssup->abbrev_full_comparator = uuid_fast_cmp;
271+
272+
MemoryContextSwitchTo(oldcontext);
273+
}
274+
275+
PG_RETURN_VOID();
276+
}
277+
278+
/*
279+
* SortSupport comparison func
280+
*/
281+
static int
282+
uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
283+
{
284+
pg_uuid_t *arg1 = DatumGetUUIDP(x);
285+
pg_uuid_t *arg2 = DatumGetUUIDP(y);
286+
287+
return uuid_internal_cmp(arg1, arg2);
288+
}
289+
290+
/*
291+
* Abbreviated key comparison func
292+
*/
293+
static int
294+
uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
295+
{
296+
if (x > y)
297+
return 1;
298+
else if (x == y)
299+
return 0;
300+
else
301+
return -1;
302+
}
303+
304+
/*
305+
* Callback for estimating effectiveness of abbreviated key optimization.
306+
*
307+
* We pay no attention to the cardinality of the non-abbreviated data, because
308+
* there is no equality fast-path within authoritative uuid comparator.
309+
*/
310+
static bool
311+
uuid_abbrev_abort(int memtupcount, SortSupport ssup)
312+
{
313+
uuid_sortsupport_state *uss = ssup->ssup_extra;
314+
double abbr_card;
315+
316+
if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
317+
return false;
318+
319+
abbr_card = estimateHyperLogLog(&uss->abbr_card);
320+
321+
/*
322+
* If we have >100k distinct values, then even if we were sorting many
323+
* billion rows we'd likely still break even, and the penalty of undoing
324+
* that many rows of abbrevs would probably not be worth it. Stop even
325+
* counting at that point.
326+
*/
327+
if (abbr_card > 100000.0)
328+
{
329+
#ifdef TRACE_SORT
330+
if (trace_sort)
331+
elog(LOG,
332+
"uuid_abbrev: estimation ends at cardinality %f"
333+
" after " INT64_FORMAT " values (%d rows)",
334+
abbr_card, uss->input_count, memtupcount);
335+
#endif
336+
uss->estimating = false;
337+
return false;
338+
}
339+
340+
/*
341+
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
342+
* fudge factor allows us to abort earlier on genuinely pathological data
343+
* where we've had exactly one abbreviated value in the first 2k (non-null)
344+
* rows.
345+
*/
346+
if (abbr_card < uss->input_count / 2000.0 + 0.5)
347+
{
348+
#ifdef TRACE_SORT
349+
if (trace_sort)
350+
elog(LOG,
351+
"uuid_abbrev: aborting abbreviation at cardinality %f"
352+
" below threshold %f after " INT64_FORMAT " values (%d rows)",
353+
abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
354+
memtupcount);
355+
#endif
356+
return true;
357+
}
358+
359+
#ifdef TRACE_SORT
360+
if (trace_sort)
361+
elog(LOG,
362+
"uuid_abbrev: cardinality %f after " INT64_FORMAT
363+
" values (%d rows)", abbr_card, uss->input_count, memtupcount);
364+
#endif
365+
366+
return false;
367+
}
368+
369+
/*
370+
* Conversion routine for sortsupport. Converts original uuid representation
371+
* to abbreviated key representation. Our encoding strategy is simple -- pack
372+
* the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
373+
* machines, the bytes are stored in reverse order), and treat it as an
374+
* unsigned integer.
375+
*/
376+
static Datum
377+
uuid_abbrev_convert(Datum original, SortSupport ssup)
378+
{
379+
uuid_sortsupport_state *uss = ssup->ssup_extra;
380+
pg_uuid_t *authoritative = DatumGetUUIDP(original);
381+
Datum res;
382+
383+
memcpy((char *) &res, authoritative->data, sizeof(Datum));
384+
uss->input_count += 1;
385+
386+
if (uss->estimating)
387+
{
388+
uint32 tmp;
389+
390+
#if SIZEOF_DATUM == 8
391+
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
392+
#else /* SIZEOF_DATUM != 8 */
393+
tmp = (uint32) res;
394+
#endif
395+
396+
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
397+
}
398+
399+
/*
400+
* Byteswap on little-endian machines.
401+
*
402+
* This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
403+
* comparator) works correctly on all platforms. If we didn't do this, the
404+
* comparator would have to call memcmp() with a pair of pointers to the
405+
* first byte of each abbreviated key, which is slower.
406+
*/
407+
res = DatumBigEndianToNative(res);
408+
409+
return res;
410+
}
411+
225412
/* hash index support */
226413
Datum
227414
uuid_hash(PG_FUNCTION_ARGS)

‎src/include/catalog/pg_amproc.h

Copy file name to clipboardExpand all lines: src/include/catalog/pg_amproc.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ DATA(insert ( 2233 703 703 1 380 ));
134134
DATA(insert ( 2234 704 704 1 381 ));
135135
DATA(insert ( 2789 27 27 1 2794 ));
136136
DATA(insert ( 2968 2950 2950 1 2960 ));
137+
DATA(insert ( 2968 2950 2950 2 3300 ));
137138
DATA(insert ( 2994 2249 2249 1 2987 ));
138139
DATA(insert ( 3194 2249 2249 1 3187 ));
139140
DATA(insert ( 3253 3220 3220 1 3251 ));

‎src/include/catalog/pg_proc.h

Copy file name to clipboardExpand all lines: src/include/catalog/pg_proc.h
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4467,6 +4467,8 @@ DATA(insert OID = 2958 ( uuid_gt PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
44674467
DATA(insert OID = 2959 ( uuid_ne PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_ne _null_ _null_ _null_ ));
44684468
DATA(insert OID = 2960 ( uuid_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_cmp _null_ _null_ _null_ ));
44694469
DESCR("less-equal-greater");
4470+
DATA(insert OID = 3300 ( uuid_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ uuid_sortsupport _null_ _null_ _null_ ));
4471+
DESCR("sort support");
44704472
DATA(insert OID = 2961 ( uuid_recv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2950 "2281" _null_ _null_ _null_ _null_ _null_ uuid_recv _null_ _null_ _null_ ));
44714473
DESCR("I/O");
44724474
DATA(insert OID = 2962 ( uuid_send PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 17 "2950" _null_ _null_ _null_ _null_ _null_ uuid_send _null_ _null_ _null_ ));

‎src/include/utils/builtins.h

Copy file name to clipboardExpand all lines: src/include/utils/builtins.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,7 @@ extern Datum uuid_ge(PG_FUNCTION_ARGS);
11741174
extern Datum uuid_gt(PG_FUNCTION_ARGS);
11751175
extern Datum uuid_ne(PG_FUNCTION_ARGS);
11761176
extern Datum uuid_cmp(PG_FUNCTION_ARGS);
1177+
extern Datum uuid_sortsupport(PG_FUNCTION_ARGS);
11771178
extern Datum uuid_hash(PG_FUNCTION_ARGS);
11781179

11791180
/* windowfuncs.c */

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.