Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d09dbeb

Browse filesBrowse files
committed
Speedup hash index builds by skipping needless binary searches
When building hash indexes using the spool method, tuples are added to the index page in hashkey order. Because of this, we can safely skip performing the binary search on the existing tuples on the page to find the location to insert the tuple based on its hashkey value. For this case, we can just always put the tuple at the end of the item array as the tuples will always arrive in hashkey order. Testing has shown that this can improve hash index build speeds by 5-15% with a unique set of integer values. Author: Simon Riggs Reviewed-by: David Rowley Tested-by: David Zhang, Tomas Vondra Discussion: https://postgr.es/m/CANbhV-GBc5JoG0AneUGPZZW3o4OK5LjBGeKe_icpC3R1McrZWQ@mail.gmail.com
1 parent d46ad72 commit d09dbeb
Copy full SHA for d09dbeb

File tree

4 files changed

+41
-13
lines changed
Filter options

4 files changed

+41
-13
lines changed

‎src/backend/access/hash/hash.c

Copy file name to clipboardExpand all lines: src/backend/access/hash/hash.c
+2-2
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ hashbuildCallback(Relation index,
231231
itup = index_form_tuple(RelationGetDescr(index),
232232
index_values, index_isnull);
233233
itup->t_tid = *tid;
234-
_hash_doinsert(index, itup, buildstate->heapRel);
234+
_hash_doinsert(index, itup, buildstate->heapRel, false);
235235
pfree(itup);
236236
}
237237

@@ -265,7 +265,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
265265
itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
266266
itup->t_tid = *ht_ctid;
267267

268-
_hash_doinsert(rel, itup, heapRel);
268+
_hash_doinsert(rel, itup, heapRel, false);
269269

270270
pfree(itup);
271271

‎src/backend/access/hash/hashinsert.c

Copy file name to clipboardExpand all lines: src/backend/access/hash/hashinsert.c
+33-8
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,12 @@ static void _hash_vacuum_one_page(Relation rel, Relation hrel,
3232
*
3333
* This routine is called by the public interface routines, hashbuild
3434
* and hashinsert. By here, itup is completely filled in.
35+
*
36+
* 'sorted' must only be passed as 'true' when inserts are done in hashkey
37+
* order.
3538
*/
3639
void
37-
_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
40+
_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
3841
{
3942
Buffer buf = InvalidBuffer;
4043
Buffer bucket_buf;
@@ -198,7 +201,7 @@ _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
198201
START_CRIT_SECTION();
199202

200203
/* found page with enough space, so add the item here */
201-
itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
204+
itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
202205
MarkBufferDirty(buf);
203206

204207
/* metapage operations */
@@ -263,21 +266,43 @@ _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
263266
*
264267
* Returns the offset number at which the tuple was inserted. This function
265268
* is responsible for preserving the condition that tuples in a hash index
266-
* page are sorted by hashkey value.
269+
* page are sorted by hashkey value, however, if the caller is certain that
270+
* the hashkey for the tuple being added is >= the hashkeys of all existing
271+
* tuples on the page, then the 'appendtup' flag may be passed as true. This
272+
* saves from having to binary search for the correct location to insert the
273+
* tuple.
267274
*/
268275
OffsetNumber
269-
_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
276+
_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup,
277+
bool appendtup)
270278
{
271279
OffsetNumber itup_off;
272280
Page page;
273-
uint32 hashkey;
274281

275282
_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
276283
page = BufferGetPage(buf);
277284

278-
/* Find where to insert the tuple (preserving page's hashkey ordering) */
279-
hashkey = _hash_get_indextuple_hashkey(itup);
280-
itup_off = _hash_binsearch(page, hashkey);
285+
/*
286+
* Find where to insert the tuple (preserving page's hashkey ordering). If
287+
* 'appendtup' is true then we just insert it at the end.
288+
*/
289+
if (appendtup)
290+
{
291+
itup_off = PageGetMaxOffsetNumber(page) + 1;
292+
293+
/* ensure this tuple's hashkey is >= the final existing tuple */
294+
Assert(PageGetMaxOffsetNumber(page) == 0 ||
295+
_hash_get_indextuple_hashkey((IndexTuple)
296+
PageGetItem(page, PageGetItemId(page,
297+
PageGetMaxOffsetNumber(page)))) <=
298+
_hash_get_indextuple_hashkey(itup));
299+
}
300+
else
301+
{
302+
uint32 hashkey = _hash_get_indextuple_hashkey(itup);
303+
304+
itup_off = _hash_binsearch(page, hashkey);
305+
}
281306

282307
if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
283308
== InvalidOffsetNumber)

‎src/backend/access/hash/hashsort.c

Copy file name to clipboardExpand all lines: src/backend/access/hash/hashsort.c
+2-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ _h_indexbuild(HSpool *hspool, Relation heapRel)
145145
Assert(hashkey >= lasthashkey);
146146
#endif
147147

148-
_hash_doinsert(hspool->index, itup, heapRel);
148+
/* the tuples are sorted by hashkey, so pass 'sorted' as true */
149+
_hash_doinsert(hspool->index, itup, heapRel, true);
149150

150151
pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
151152
++tups_done);

‎src/include/access/hash.h

Copy file name to clipboardExpand all lines: src/include/access/hash.h
+4-2
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,11 @@ extern void hashadjustmembers(Oid opfamilyoid,
390390
/* private routines */
391391

392392
/* hashinsert.c */
393-
extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel);
393+
extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel,
394+
bool sorted);
394395
extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
395-
Size itemsize, IndexTuple itup);
396+
Size itemsize, IndexTuple itup,
397+
bool appendtup);
396398
extern void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
397399
OffsetNumber *itup_offsets, uint16 nitups);
398400

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.