Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit dd1f6b0

Browse filesBrowse files
committed
Provide a way block-level table AMs could re-use acquire_sample_rows()
While keeping API the same, this commit provides a way for block-level table AMs to re-use existing acquire_sample_rows() by providing custom callbacks for getting the next block and the next tuple. Reported-by: Andres Freund Discussion: https://postgr.es/m/20240407214001.jgpg5q3yv33ve6y3%40awork3.anarazel.de Reviewed-by: Pavel Borisov
1 parent df64c81 commit dd1f6b0
Copy full SHA for dd1f6b0

File tree

Expand file treeCollapse file tree

5 files changed

+106
-19
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+106
-19
lines changed

‎src/backend/access/heap/heapam_handler.c

Copy file name to clipboardExpand all lines: src/backend/access/heap/heapam_handler.c
+12Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2666,6 +2666,18 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
26662666
}
26672667
}
26682668

2669+
/*
2670+
* heapap_analyze -- implementation of relation_analyze() for heap
2671+
* table access method
2672+
*/
2673+
static void
2674+
heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
2675+
BlockNumber *totalpages, BufferAccessStrategy bstrategy)
2676+
{
2677+
block_level_table_analyze(relation, func, totalpages, bstrategy,
2678+
heapam_scan_analyze_next_block,
2679+
heapam_scan_analyze_next_tuple);
2680+
}
26692681

26702682
/* ------------------------------------------------------------------------
26712683
* Definition of the heap table access method.

‎src/backend/commands/analyze.c

Copy file name to clipboardExpand all lines: src/backend/commands/analyze.c
+26-16Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ int default_statistics_target = 100;
7676
/* A few variables that don't seem worth passing around as parameters */
7777
static MemoryContext anl_context = NULL;
7878
static BufferAccessStrategy vac_strategy;
79+
static ScanAnalyzeNextBlockFunc scan_analyze_next_block;
80+
static ScanAnalyzeNextTupleFunc scan_analyze_next_tuple;
7981

8082

8183
static void do_analyze_rel(Relation onerel,
@@ -88,9 +90,6 @@ static void compute_index_stats(Relation onerel, double totalrows,
8890
MemoryContext col_context);
8991
static VacAttrStats *examine_attribute(Relation onerel, int attnum,
9092
Node *index_expr);
91-
static int acquire_sample_rows(Relation onerel, int elevel,
92-
HeapTuple *rows, int targrows,
93-
double *totalrows, double *totaldeadrows);
9493
static int compare_rows(const void *a, const void *b, void *arg);
9594
static int acquire_inherited_sample_rows(Relation onerel, int elevel,
9695
HeapTuple *rows, int targrows,
@@ -191,7 +190,10 @@ analyze_rel(Oid relid, RangeVar *relation,
191190
if (onerel->rd_rel->relkind == RELKIND_RELATION ||
192191
onerel->rd_rel->relkind == RELKIND_MATVIEW)
193192
{
194-
/* Use row acquisition function provided by table AM */
193+
/*
194+
* Get row acquisition function, blocks and tuples iteration callbacks
195+
* provided by table AM
196+
*/
195197
table_relation_analyze(onerel, &acquirefunc,
196198
&relpages, vac_strategy);
197199
}
@@ -1117,15 +1119,17 @@ block_sampling_read_stream_next(ReadStream *stream,
11171119
}
11181120

11191121
/*
1120-
* acquire_sample_rows -- acquire a random sample of rows from the heap
1122+
* acquire_sample_rows -- acquire a random sample of rows from the
1123+
* block-based relation
11211124
*
11221125
* Selected rows are returned in the caller-allocated array rows[], which
11231126
* must have at least targrows entries.
11241127
* The actual number of rows selected is returned as the function result.
1125-
* We also estimate the total numbers of live and dead rows in the heap,
1128+
* We also estimate the total numbers of live and dead rows in the relation,
11261129
* and return them into *totalrows and *totaldeadrows, respectively.
11271130
*
1128-
* The returned list of tuples is in order by physical position in the heap.
1131+
* The returned list of tuples is in order by physical position in the
1132+
* relation.
11291133
* (We will rely on this later to derive correlation estimates.)
11301134
*
11311135
* As of May 2004 we use a new two-stage method: Stage one selects up
@@ -1147,7 +1151,7 @@ block_sampling_read_stream_next(ReadStream *stream,
11471151
* look at a statistically unbiased set of blocks, we should get
11481152
* unbiased estimates of the average numbers of live and dead rows per
11491153
* block. The previous sampling method put too much credence in the row
1150-
* density near the start of the heap.
1154+
* density near the start of the relation.
11511155
*/
11521156
static int
11531157
acquire_sample_rows(Relation onerel, int elevel,
@@ -1188,7 +1192,7 @@ acquire_sample_rows(Relation onerel, int elevel,
11881192
/* Prepare for sampling rows */
11891193
reservoir_init_selection_state(&rstate, targrows);
11901194

1191-
scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
1195+
scan = table_beginscan_analyze(onerel);
11921196
slot = table_slot_create(onerel, NULL);
11931197

11941198
stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
@@ -1200,11 +1204,11 @@ acquire_sample_rows(Relation onerel, int elevel,
12001204
0);
12011205

12021206
/* Outer loop over blocks to sample */
1203-
while (heapam_scan_analyze_next_block(scan, stream))
1207+
while (scan_analyze_next_block(scan, stream))
12041208
{
12051209
vacuum_delay_point();
12061210

1207-
while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
1211+
while (scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
12081212
{
12091213
/*
12101214
* The first targrows sample rows are simply copied into the
@@ -1256,7 +1260,7 @@ acquire_sample_rows(Relation onerel, int elevel,
12561260
read_stream_end(stream);
12571261

12581262
ExecDropSingleTupleTableSlot(slot);
1259-
heap_endscan(scan);
1263+
table_endscan(scan);
12601264

12611265
/*
12621266
* If we didn't find as many tuples as we wanted then we're done. No sort
@@ -1328,16 +1332,22 @@ compare_rows(const void *a, const void *b, void *arg)
13281332
}
13291333

13301334
/*
1331-
* heapam_analyze -- implementation of relation_analyze() table access method
1332-
* callback for heap
1335+
* block_level_table_analyze -- implementation of relation_analyze() for
1336+
* block-level table access methods
13331337
*/
13341338
void
1335-
heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
1336-
BlockNumber *totalpages, BufferAccessStrategy bstrategy)
1339+
block_level_table_analyze(Relation relation,
1340+
AcquireSampleRowsFunc *func,
1341+
BlockNumber *totalpages,
1342+
BufferAccessStrategy bstrategy,
1343+
ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
1344+
ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb)
13371345
{
13381346
*func = acquire_sample_rows;
13391347
*totalpages = RelationGetNumberOfBlocks(relation);
13401348
vac_strategy = bstrategy;
1349+
scan_analyze_next_block = scan_analyze_next_block_cb;
1350+
scan_analyze_next_tuple = scan_analyze_next_tuple_cb;
13411351
}
13421352

13431353

‎src/include/access/tableam.h

Copy file name to clipboardExpand all lines: src/include/access/tableam.h
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,19 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
10201020
return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
10211021
}
10221022

1023+
/*
1024+
* table_beginscan_analyze is an alternative entry point for setting up a
1025+
* TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
1026+
* the same data structure although the behavior is rather different.
1027+
*/
1028+
static inline TableScanDesc
1029+
table_beginscan_analyze(Relation rel)
1030+
{
1031+
uint32 flags = SO_TYPE_ANALYZE;
1032+
1033+
return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
1034+
}
1035+
10231036
/*
10241037
* End relation scan.
10251038
*/

‎src/include/commands/vacuum.h

Copy file name to clipboardExpand all lines: src/include/commands/vacuum.h
+53-3Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
#include "catalog/pg_class.h"
2222
#include "catalog/pg_statistic.h"
2323
#include "catalog/pg_type.h"
24+
#include "executor/tuptable.h"
2425
#include "parser/parse_node.h"
2526
#include "storage/buf.h"
2627
#include "storage/lock.h"
28+
#include "storage/read_stream.h"
2729
#include "utils/relcache.h"
2830

2931
/*
@@ -390,12 +392,60 @@ extern void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs,
390392
extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
391393

392394
/* in commands/analyze.c */
395+
396+
struct TableScanDescData;
397+
398+
399+
/*
400+
* A callback to prepare to analyze block from `stream` of `scan`. The scan
401+
* has been started with table_beginscan_analyze().
402+
*
403+
* The callback may acquire resources like locks that are held until
404+
* ScanAnalyzeNextTupleFunc returns false. In some cases it could be
405+
* useful to hold a lock until all tuples in a block have been analyzed by
406+
* ScanAnalyzeNextTupleFunc.
407+
*
408+
* The callback can return false if the block is not suitable for
409+
* sampling, e.g. because it's a metapage that could never contain tuples.
410+
*
411+
* This is primarily suited for block-based AMs. It's not clear what a
412+
* good interface for non block-based AMs would be, so there isn't one
413+
* yet and sampling using a custom implementation of acquire_sample_rows
414+
* may be preferred.
415+
*/
416+
typedef bool (*ScanAnalyzeNextBlockFunc) (struct TableScanDescData *scan,
417+
ReadStream *stream);
418+
419+
/*
420+
* A callback to iterate over tuples in the block selected with
421+
* ScanAnalyzeNextBlockFunc (which needs to have returned true, and
422+
* this routine may not have returned false for the same block before). If
423+
* a tuple that's suitable for sampling is found, true is returned and a
424+
* tuple is stored in `slot`.
425+
*
426+
* *liverows and *deadrows are incremented according to the encountered
427+
* tuples.
428+
*
429+
* Not every AM might have a meaningful concept of dead rows, in which
430+
* case it's OK to not increment *deadrows - but note that that may
431+
* influence autovacuum scheduling (see comment for relation_vacuum
432+
* callback).
433+
*/
434+
typedef bool (*ScanAnalyzeNextTupleFunc) (struct TableScanDescData *scan,
435+
TransactionId OldestXmin,
436+
double *liverows,
437+
double *deadrows,
438+
TupleTableSlot *slot);
439+
393440
extern void analyze_rel(Oid relid, RangeVar *relation,
394441
VacuumParams *params, List *va_cols, bool in_outer_xact,
395442
BufferAccessStrategy bstrategy);
396-
extern void heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
397-
BlockNumber *totalpages,
398-
BufferAccessStrategy bstrategy);
443+
extern void block_level_table_analyze(Relation relation,
444+
AcquireSampleRowsFunc *func,
445+
BlockNumber *totalpages,
446+
BufferAccessStrategy bstrategy,
447+
ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
448+
ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb);
399449

400450
extern bool std_typanalyze(VacAttrStats *stats);
401451

‎src/tools/pgindent/typedefs.list

Copy file name to clipboardExpand all lines: src/tools/pgindent/typedefs.list
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2535,6 +2535,8 @@ ScalarIOData
25352535
ScalarItem
25362536
ScalarMCVItem
25372537
Scan
2538+
ScanAnalyzeNextBlockFunc
2539+
ScanAnalyzeNextTupleFunc
25382540
ScanDirection
25392541
ScanKey
25402542
ScanKeyData

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.