Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit c02fdc9

Browse filesBrowse files
committed
Logical Tape Set: use min heap for freelist.
Previously, the freelist of blocks was tracked as an occasionally-sorted array. A min heap is more resilient to larger freelists or more frequent changes between reading and writing. Discussion: https://postgr.es/m/97c46a59c27f3c38e486ca170fcbc618d97ab049.camel%40j-davis.com
1 parent cac8ce4 commit c02fdc9
Copy full SHA for c02fdc9

File tree

Expand file treeCollapse file tree

1 file changed

+104
-56
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+104
-56
lines changed

‎src/backend/utils/sort/logtape.c

Copy file name to clipboardExpand all lines: src/backend/utils/sort/logtape.c
+104-56Lines changed: 104 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,8 @@
4949
* when reading, and read multiple blocks from the same tape in one go,
5050
* whenever the buffer becomes empty.
5151
*
52-
* To support the above policy of writing to the lowest free block,
53-
* ltsGetFreeBlock sorts the list of free block numbers into decreasing
54-
* order each time it is asked for a block and the list isn't currently
55-
* sorted. This is an efficient way to handle it because we expect cycles
56-
* of releasing many blocks followed by re-using many blocks, due to
57-
* the larger read buffer.
52+
* To support the above policy of writing to the lowest free block, the
53+
* freelist is a min heap.
5854
*
5955
* Since all the bookkeeping and buffer memory is allocated with palloc(),
6056
* and the underlying file(s) are made with OpenTemporaryFile, all resources
@@ -170,7 +166,7 @@ struct LogicalTapeSet
170166
/*
171167
* File size tracking. nBlocksWritten is the size of the underlying file,
172168
* in BLCKSZ blocks. nBlocksAllocated is the number of blocks allocated
173-
* by ltsGetFreeBlock(), and it is always greater than or equal to
169+
* by ltsReleaseBlock(), and it is always greater than or equal to
174170
* nBlocksWritten. Blocks between nBlocksAllocated and nBlocksWritten are
175171
* blocks that have been allocated for a tape, but have not been written
176172
* to the underlying file yet. nHoleBlocks tracks the total number of
@@ -188,17 +184,11 @@ struct LogicalTapeSet
188184
* If forgetFreeSpace is true then any freed blocks are simply forgotten
189185
* rather than being remembered in freeBlocks[]. See notes for
190186
* LogicalTapeSetForgetFreeSpace().
191-
*
192-
* If blocksSorted is true then the block numbers in freeBlocks are in
193-
* *decreasing* order, so that removing the last entry gives us the lowest
194-
* free block. We re-sort the blocks whenever a block is demanded; this
195-
* should be reasonably efficient given the expected usage pattern.
196187
*/
197188
bool forgetFreeSpace; /* are we remembering free blocks? */
198-
bool blocksSorted; /* is freeBlocks[] currently in order? */
199-
long *freeBlocks; /* resizable array */
200-
int nFreeBlocks; /* # of currently free blocks */
201-
int freeBlocksLen; /* current allocated length of freeBlocks[] */
189+
long *freeBlocks; /* resizable array holding minheap */
190+
long nFreeBlocks; /* # of currently free blocks */
191+
Size freeBlocksLen; /* current allocated length of freeBlocks[] */
202192

203193
/* The array of logical tapes. */
204194
int nTapes; /* # of logical tapes in set */
@@ -321,46 +311,88 @@ ltsReadFillBuffer(LogicalTapeSet *lts, LogicalTape *lt)
321311
return (lt->nbytes > 0);
322312
}
323313

324-
/*
325-
* qsort comparator for sorting freeBlocks[] into decreasing order.
326-
*/
327-
static int
328-
freeBlocks_cmp(const void *a, const void *b)
314+
static inline void
315+
swap_nodes(long *heap, unsigned long a, unsigned long b)
316+
{
317+
unsigned long swap;
318+
319+
swap = heap[a];
320+
heap[a] = heap[b];
321+
heap[b] = swap;
322+
}
323+
324+
static inline unsigned long
325+
left_offset(unsigned long i)
326+
{
327+
return 2 * i + 1;
328+
}
329+
330+
static inline unsigned long
331+
right_offset(unsigned i)
332+
{
333+
return 2 * i + 2;
334+
}
335+
336+
static inline unsigned long
337+
parent_offset(unsigned long i)
329338
{
330-
long ablk = *((const long *) a);
331-
long bblk = *((const long *) b);
332-
333-
/* can't just subtract because long might be wider than int */
334-
if (ablk < bblk)
335-
return 1;
336-
if (ablk > bblk)
337-
return -1;
338-
return 0;
339+
return (i - 1) / 2;
339340
}
340341

341342
/*
342-
* Select a currently unused block for writing to.
343+
* Select the lowest currently unused block by taking the first element from
344+
* the freelist min heap.
343345
*/
344346
static long
345347
ltsGetFreeBlock(LogicalTapeSet *lts)
346348
{
347-
/*
348-
* If there are multiple free blocks, we select the one appearing last in
349-
* freeBlocks[] (after sorting the array if needed). If there are none,
350-
* assign the next block at the end of the file.
351-
*/
352-
if (lts->nFreeBlocks > 0)
349+
long *heap = lts->freeBlocks;
350+
long blocknum;
351+
int heapsize;
352+
unsigned long pos;
353+
354+
/* freelist empty; allocate a new block */
355+
if (lts->nFreeBlocks == 0)
356+
return lts->nBlocksAllocated++;
357+
358+
if (lts->nFreeBlocks == 1)
353359
{
354-
if (!lts->blocksSorted)
355-
{
356-
qsort((void *) lts->freeBlocks, lts->nFreeBlocks,
357-
sizeof(long), freeBlocks_cmp);
358-
lts->blocksSorted = true;
359-
}
360-
return lts->freeBlocks[--lts->nFreeBlocks];
360+
lts->nFreeBlocks--;
361+
return lts->freeBlocks[0];
361362
}
362-
else
363-
return lts->nBlocksAllocated++;
363+
364+
/* take top of minheap */
365+
blocknum = heap[0];
366+
367+
/* replace with end of minheap array */
368+
heap[0] = heap[--lts->nFreeBlocks];
369+
370+
/* sift down */
371+
pos = 0;
372+
heapsize = lts->nFreeBlocks;
373+
while (true)
374+
{
375+
unsigned long left = left_offset(pos);
376+
unsigned long right = right_offset(pos);
377+
unsigned long min_child;
378+
379+
if (left < heapsize && right < heapsize)
380+
min_child = (heap[left] < heap[right]) ? left : right;
381+
else if (left < heapsize)
382+
min_child = left;
383+
else if (right < heapsize)
384+
min_child = right;
385+
else
386+
break;
387+
388+
if (heap[min_child] >= heap[pos])
389+
break;
390+
391+
swap_nodes(heap, min_child, pos);
392+
pos = min_child;
393+
}
394+
395+
return blocknum;
364396
}
365397

366398
/*
@@ -369,7 +401,8 @@ ltsGetFreeBlock(LogicalTapeSet *lts)
369401
static void
370402
ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
371403
{
372-
int ndx;
404+
long *heap;
405+
unsigned long pos;
373406

374407
/*
375408
* Do nothing if we're no longer interested in remembering free space.
@@ -382,19 +415,35 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
382415
*/
383416
if (lts->nFreeBlocks >= lts->freeBlocksLen)
384417
{
418+
/*
419+
* If the freelist becomes very large, just return and leak this free
420+
* block.
421+
*/
422+
if (lts->freeBlocksLen * 2 > MaxAllocSize)
423+
return;
424+
385425
lts->freeBlocksLen *= 2;
386426
lts->freeBlocks = (long *) repalloc(lts->freeBlocks,
387427
lts->freeBlocksLen * sizeof(long));
388428
}
389429

390-
/*
391-
* Add blocknum to array, and mark the array unsorted if it's no longer in
392-
* decreasing order.
393-
*/
394-
ndx = lts->nFreeBlocks++;
395-
lts->freeBlocks[ndx] = blocknum;
396-
if (ndx > 0 && lts->freeBlocks[ndx - 1] < blocknum)
397-
lts->blocksSorted = false;
430+
heap = lts->freeBlocks;
431+
pos = lts->nFreeBlocks;
432+
433+
/* place entry at end of minheap array */
434+
heap[pos] = blocknum;
435+
lts->nFreeBlocks++;
436+
437+
/* sift up */
438+
while (pos != 0)
439+
{
440+
unsigned long parent = parent_offset(pos);
441+
if (heap[parent] < heap[pos])
442+
break;
443+
444+
swap_nodes(heap, parent, pos);
445+
pos = parent;
446+
}
398447
}
399448

400449
/*
@@ -524,7 +573,6 @@ LogicalTapeSetCreate(int ntapes, TapeShare *shared, SharedFileSet *fileset,
524573
lts->nBlocksWritten = 0L;
525574
lts->nHoleBlocks = 0L;
526575
lts->forgetFreeSpace = false;
527-
lts->blocksSorted = true; /* a zero-length array is sorted ... */
528576
lts->freeBlocksLen = 32; /* reasonable initial guess */
529577
lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
530578
lts->nFreeBlocks = 0;

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.