This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 23aef441a827a77f3338e81950a0e66a26f6eaf7 Author: Haolin Wang <whao...@vmware.com> AuthorDate: Sat Oct 29 19:51:31 2022 +0800 fast-analyze: implement fast ANALYZE for append-optimized tables Prior to this patch, GPDB ANALYZE on large AO/CO tables is a time-consuming process. This is because PostgreSQL's two-stage sampling method didn't work well on AO/CO tables. GPDB had to unpack all varblocks till to the target tuples, which could easily result in almost full table scanning if sampling tuples fall around the end of the table. Denis Smirnov <s...@picodata.io> 's PR greenplum-db#11190 introduced a `logical` block concept containing fixed number of tuples to support PG's two-stage sampling mechanism, also it sped up fetching target tuples by skipping uncompression of varblock content. Thanks for Denis Smirnov's great contribution! Also, thanks for Ashwin Agrawal <aash...@vmware.com> 's advice on leveraging AO Block Directory to locate the target sample row without scanning unnecessary varblocks, which brings another significant performance improvement with caching warmed up. In addition, - GPDB has AO/CO specific feature that storing total tuple count in an auxiliary table which could be easily obtained without too much overhead. - GPDB has `fetch` facilities support finding varblock based on AOTupleId without uncompressing unnecessary varblocks. Based on above works and properties, we re-implemented AO/CO ANALYZE sampling by combining Knuth's Algorithm S and varblock skipping in this patch, to address the time-consuming problem. We didn't impelment two-stage sampling for AO/CO as the total size of data set (total tuple count) could be known in advance hence Algorithm S is sufficient to satisfy the sampling requirement. Special thanks Zhenghua Lyu (https://kainwen.com/) for detail analysis of Algorithm S: [Analysis of Algorithm S](https://kainwen.com/2022/11/06/analysis-of-algorithm-s) and follow up [discussion](https://stackoverflow.com/questions/74345921/performance-comparsion-algorithm-s-and-algorithm-z?noredirect=1#comment131292564_74345921) Here is a simple example to show the optimization effect: [AO with compression, with Fast Analyze enabled] create table ao (a int, b inet, c inet) with (appendonly=true, orientation=row, compresstype=zlib, compresslevel=3); insert into ao select i, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet from generate_series(1,10000000)i; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; select count(*) from ao; count ------------ 1280000000 (1 row) gpadmin=# analyze ao; ANALYZE Time: 2814.939 ms (00:02.815) gpadmin=# [with block directory and caching warmed] gpadmin=# analyze ao; ANALYZE Time: 1605.342 ms (00:01.605) gpadmin=# [Legacy Analyze] gpadmin=# analyze ao; ANALYZE Time: 59711.905 ms (00:59.712) gpadmin=# [Heap without compression] create table heap (a int, b inet, c inet); insert same data set gpadmin=# analyze heap; ANALYZE Time: 2087.694 ms (00:02.088) gpadmin=# Co-authored-by: Soumyadeep Chakraborty <soumyadeep2...@gmail.com> Reviewed by: Ashwin Agrawal, Soumyadeep Chakraborty, Zhenglong Li, Qing Ma --- src/backend/access/aocs/aocsam.c | 465 ++++++++++++++- src/backend/access/aocs/aocsam_handler.c | 94 ++- src/backend/access/appendonly/appendonlyam.c | 357 +++++++++++- .../access/appendonly/appendonlyam_handler.c | 107 +++- .../access/appendonly/appendonlyblockdirectory.c | 175 +++++- src/backend/access/heap/heapam_handler.c | 1 - src/backend/commands/analyze.c | 132 ++--- src/backend/commands/analyzefuncs.c | 3 + src/backend/utils/datumstream/datumstream.c | 2 +- src/backend/utils/misc/sampling.c | 8 +- src/include/access/tableam.h | 48 +- src/include/cdb/cdbaocsam.h | 156 +++-- src/include/cdb/cdbappendonlyam.h | 48 +- src/include/cdb/cdbappendonlyblockdirectory.h | 63 +- src/include/commands/vacuum.h | 7 - src/include/utils/datumstream.h | 4 +- src/include/utils/sampling.h | 2 +- src/test/isolation2/expected/ao_blkdir.out | 318 +++++++++++ src/test/isolation2/input/uao/fast_analyze.source | 349 ++++++++++++ src/test/isolation2/isolation2_schedule | 2 + src/test/isolation2/output/uao/fast_analyze.source | 632 +++++++++++++++++++++ src/test/isolation2/sql/ao_blkdir.sql | 151 +++++ 22 files changed, 2912 insertions(+), 212 deletions(-) diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index a3f2b597977..d3e5c15802a 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -124,6 +124,11 @@ open_all_datumstreamread_segfiles(AOCSScanDesc scan, AOCSFileSegInfo *segInfo) RelationOpenSmgr(rel); open_datumstreamread_segfile(basepath, rel, segInfo, ds[attno], attno); + + /* skip reading block for ANALYZE */ + if ((scan->rs_base.rs_flags & SO_TYPE_ANALYZE) != 0) + continue; + datumstreamread_block(ds[attno], blockDirectory, attno); AOCSScanDesc_UpdateTotalBytesRead(scan, attno); @@ -467,6 +472,46 @@ close_cur_scan_seg(AOCSScanDesc scan) AppendOnlyBlockDirectory_End_forInsert(scan->blockDirectory); } +static void +aocs_blkdirscan_init(AOCSScanDesc scan) +{ + if (scan->aocsfetch == NULL) + { + int natts = RelationGetNumberOfAttributes(scan->rs_base.rs_rd); + scan->proj = palloc(natts * sizeof(*scan->proj)); + MemSet(scan->proj, true, natts * sizeof(*scan->proj)); + + scan->aocsfetch = aocs_fetch_init(scan->rs_base.rs_rd, + scan->rs_base.rs_snapshot, + scan->appendOnlyMetaDataSnapshot, + scan->proj); + } + + scan->blkdirscan = palloc0(sizeof(AOBlkDirScanData)); + AOBlkDirScan_Init(scan->blkdirscan, &scan->aocsfetch->blockDirectory); +} + +static void +aocs_blkdirscan_finish(AOCSScanDesc scan) +{ + AOBlkDirScan_Finish(scan->blkdirscan); + pfree(scan->blkdirscan); + scan->blkdirscan = NULL; + + if (scan->aocsfetch != NULL) + { + aocs_fetch_finish(scan->aocsfetch); + pfree(scan->aocsfetch); + scan->aocsfetch = NULL; + } + + if (scan->proj != NULL) + { + pfree(scan->proj); + scan->proj = NULL; + } +} + /* * aocs_beginrangescan * @@ -552,6 +597,7 @@ aocs_beginscan_internal(Relation relation, AttrNumber natts; Oid visimaprelid; Oid visimapidxid; + Oid blkdirrelid; scan = (AOCSScanDesc) palloc0(sizeof(AOCSScanDescData)); scan->rs_base.rs_rd = relation; @@ -592,6 +638,12 @@ aocs_beginscan_internal(Relation relation, scan->columnScanInfo.ds = NULL; + if ((flags & SO_TYPE_ANALYZE) != 0) + { + scan->segfirstrow = 0; + scan->targrow = 0; + } + GetAppendOnlyEntryAttributes(RelationGetRelid(relation), NULL, NULL, @@ -599,16 +651,24 @@ aocs_beginscan_internal(Relation relation, NULL); GetAppendOnlyEntryAuxOids(relation, - NULL, NULL, NULL, + NULL, &blkdirrelid, NULL, &visimaprelid, &visimapidxid); if (scan->total_seg != 0) + { AppendOnlyVisimap_Init(&scan->visibilityMap, visimaprelid, visimapidxid, AccessShareLock, appendOnlyMetaDataSnapshot); + if ((flags & SO_TYPE_ANALYZE) != 0) + { + if (OidIsValid(blkdirrelid)) + aocs_blkdirscan_init(scan); + } + } + return scan; } @@ -666,6 +726,9 @@ aocs_endscan(AOCSScanDesc scan) if (scan->total_seg != 0) AppendOnlyVisimap_Finish(&scan->visibilityMap, AccessShareLock); + if (scan->blkdirscan != NULL) + aocs_blkdirscan_finish(scan); + /* GPDB should backport this to upstream */ if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT) UnregisterSnapshot(scan->rs_base.rs_snapshot); @@ -675,6 +738,396 @@ aocs_endscan(AOCSScanDesc scan) pfree(scan); } +static int +aocs_locate_target_segment(AOCSScanDesc scan, int64 targrow) +{ + int64 rowcount; + + for (int i = scan->cur_seg; i < scan->total_seg; i++) + { + if (i < 0) + continue; + + rowcount = scan->seginfo[i]->total_tupcount; + if (rowcount <= 0) + continue; + + if (scan->segfirstrow + rowcount - 1 >= targrow) + { + /* found the target segment */ + return i; + } + + /* continue next segment */ + scan->segfirstrow += rowcount; + scan->segrowsprocessed = 0; + } + + /* row is beyond the total number of rows in the relation */ + return -1; +} + +/* + * block directory based get_target_tuple() + */ +static bool +aocs_blkdirscan_get_target_tuple(AOCSScanDesc scan, int64 targrow, TupleTableSlot *slot) +{ + int segno, segidx; + int64 rownum = -1; + int64 rowsprocessed; + AOTupleId aotid; + int ncols = scan->columnScanInfo.relationTupleDesc->natts; + AppendOnlyBlockDirectory *blkdir = &scan->aocsfetch->blockDirectory; + + Assert(scan->blkdirscan != NULL); + + /* locate the target segment */ + segidx = aocs_locate_target_segment(scan, targrow); + if (segidx < 0) + return false; + + /* next starting position in locating segfile */ + scan->cur_seg = segidx; + + segno = scan->seginfo[segidx]->segno; + Assert(segno > InvalidFileSegNumber && segno <= AOTupleId_MaxSegmentFileNum); + + /* + * Note: It is safe to assume that the scan's segfile array and the + * blockdir's segfile array are identical. Otherwise, we should stop + * processing and throw an exception to make the error visible. + */ + if (blkdir->segmentFileInfo[segidx]->segno != segno) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("segfile array contents in both scan descriptor " + "and block directory are not identical on " + "append-optimized relation '%s'", + RelationGetRelationName(blkdir->aoRel)))); + } + + /* + * Unlike ao_row, we set currentSegmentFileNum for ao_column here + * just for passing the assertion in extract_minipage() called by + * AOBlkDirScan_GetRowNum(). + * Since we don't invoke AppendOnlyBlockDirectory_GetCachedEntry() + * for ao_column, it shoule be restored back to the original value + * for AppendOnlyBlockDirectory_GetEntry() working properly. + */ + int currentSegmentFileNum = blkdir->currentSegmentFileNum; + blkdir->currentSegmentFileNum = blkdir->segmentFileInfo[segidx]->segno; + + /* locate the target row by seqscan block directory */ + for (int col = 0; col < ncols; col++) + { + /* + * "segfirstrow" should be always pointing to the first row of + * a new segfile, only locate_target_segment could update + * its value. + * + * "segrowsprocessed" is used for tracking the position of + * processed rows in the current segfile. + */ + rowsprocessed = scan->segfirstrow + scan->segrowsprocessed; + + if ((scan->rs_base.rs_rd)->rd_att->attrs[col].attisdropped) + continue; + + rownum = AOBlkDirScan_GetRowNum(scan->blkdirscan, + segno, + col, + targrow, + &rowsprocessed); + + elog(DEBUG2, "AOBlkDirScan_GetRowNum(segno: %d, col: %d, targrow: %ld): " + "[segfirstrow: %ld, segrowsprocessed: %ld, rownum: %ld, cached_mpentry_num: %d]", + segno, col, targrow, scan->segfirstrow, scan->segrowsprocessed, rownum, + blkdir->cached_mpentry_num); + + if (rownum < 0) + continue; + + scan->segrowsprocessed = rowsprocessed - scan->segfirstrow; + + /* + * Found a column represented in the block directory. + * Here we just look for the 1st such column, no need + * to read other columns within the same row. + */ + break; + } + + /* restore to the original value as above mentioned */ + blkdir->currentSegmentFileNum = currentSegmentFileNum; + + if (rownum < 0) + return false; + + /* form the target tuple TID */ + AOTupleIdInit(&aotid, segno, rownum); + + ExecClearTuple(slot); + + /* fetch the target tuple */ + if(!aocs_fetch(scan->aocsfetch, &aotid, slot)) + return false; + + /* OK to return this tuple */ + ExecStoreVirtualTuple(slot); + pgstat_count_heap_fetch(scan->rs_base.rs_rd); + + return true; +} + +/* + * returns the segfile number in which `targrow` locates + */ +static int +aocs_getsegment(AOCSScanDesc scan, int64 targrow) +{ + int segno, segidx; + + /* locate the target segment */ + segidx = aocs_locate_target_segment(scan, targrow); + if (segidx < 0) + { + /* done reading all segments */ + close_cur_scan_seg(scan); + scan->cur_seg = -1; + return -1; + } + + segno = scan->seginfo[segidx]->segno; + Assert(segno > InvalidFileSegNumber && segno <= AOTupleId_MaxSegmentFileNum); + + if (segidx > scan->cur_seg) + { + close_cur_scan_seg(scan); + /* adjust cur_seg to fit for open_next_scan_seg() */ + scan->cur_seg = segidx - 1; + if (open_next_scan_seg(scan) >= 0) + { + /* new segment, make sure segrowsprocessed was reset */ + Assert(scan->segrowsprocessed == 0); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Unexpected behavior, failed to open segno %d during scanning AOCO table %s", + segno, RelationGetRelationName(scan->rs_base.rs_rd)))); + } + } + + return segno; +} + +static inline int +aocs_block_remaining_rows(DatumStreamRead *ds) +{ + return (ds->blockRowCount - ds->blockRowsProcessed); +} + +/* + * fetches a single column value corresponding to `endrow` (equals to `targrow`) + */ +static bool +aocs_gettuple_column(AOCSScanDesc scan, AttrNumber attno, int64 startrow, int64 endrow, bool chkvisimap, TupleTableSlot *slot) +{ + bool isSnapshotAny = (scan->rs_base.rs_snapshot == SnapshotAny); + DatumStreamRead *ds = scan->columnScanInfo.ds[attno]; + int segno = scan->seginfo[scan->cur_seg]->segno; + AOTupleId aotid; + bool ret = true; + int64 rowstoprocess, nrows, rownum; + Datum *values; + bool *nulls; + + if (ds->blockFirstRowNum <= 0) + elog(ERROR, "AOCO varblock->blockFirstRowNum should be greater than zero."); + + Assert(segno > InvalidFileSegNumber && segno <= AOTupleId_MaxSegmentFileNum); + Assert(startrow <= endrow); + + rowstoprocess = endrow - startrow + 1; + nrows = ds->blockRowsProcessed + rowstoprocess; + rownum = ds->blockFirstRowNum + nrows - 1; + + /* form the target tuple TID */ + AOTupleIdInit(&aotid, segno, rownum); + + if (chkvisimap && !isSnapshotAny && !AppendOnlyVisimap_IsVisible(&scan->visibilityMap, &aotid)) + { + if (slot != NULL) + ExecClearTuple(slot); + + ret = false; + /* must update tracking vars before return */ + goto out; + } + + /* rowNumInBlock = rowNum - blockFirstRowNum */ + datumstreamread_find(ds, rownum - ds->blockFirstRowNum); + + values = slot->tts_values; + nulls = slot->tts_isnull; + + datumstreamread_get(ds, &(values[attno]), &(nulls[attno])); + +out: + /* update rows processed */ + ds->blockRowsProcessed += rowstoprocess; + + return ret; +} + +/* + * fetches all columns of the target tuple corresponding to `targrow` + */ +static bool +aocs_gettuple(AOCSScanDesc scan, int64 targrow, TupleTableSlot *slot) +{ + bool ret = true; + int64 rowcount = -1; + int64 rowstoprocess; + bool chkvisimap = true; + + Assert(scan->cur_seg >= 0); + Assert(slot != NULL); + + ExecClearTuple(slot); + + rowstoprocess = targrow - scan->segfirstrow + 1; + + /* read from scan->cur_seg */ + for (AttrNumber i = 0; i < scan->columnScanInfo.num_proj_atts; i++) + { + AttrNumber attno = scan->columnScanInfo.proj_atts[i]; + DatumStreamRead *ds = scan->columnScanInfo.ds[attno]; + int64 startrow = scan->segfirstrow + scan->segrowsprocessed; + + if (ds->blockRowCount <= 0) + ; /* haven't read block */ + else + { + /* block was read */ + rowcount = aocs_block_remaining_rows(ds); + Assert(rowcount >= 0); + + if (startrow + rowcount - 1 >= targrow) + { + if (!aocs_gettuple_column(scan, attno, startrow, targrow, chkvisimap, slot)) + { + ret = false; + /* must update tracking vars before return */ + goto out; + } + + chkvisimap = false; + /* haven't finished scanning on current block */ + continue; + } + else + startrow += rowcount; /* skip scanning remaining rows */ + } + + /* + * Keep reading block headers until we find the block containing + * the target row. + */ + while (true) + { + elog(DEBUG2, "aocs_gettuple(): [targrow: %ld, currow: %ld, diff: %ld, " + "startrow: %ld, rowcount: %ld, segfirstrow: %ld, segrowsprocessed: %ld, nth: %d, " + "blockRowCount: %d, blockRowsProcessed: %d]", targrow, startrow + rowcount - 1, + startrow + rowcount - 1 - targrow, startrow, rowcount, scan->segfirstrow, + scan->segrowsprocessed, datumstreamread_nth(ds), ds->blockRowCount, + ds->blockRowsProcessed); + + if (datumstreamread_block_info(ds)) + { + rowcount = ds->blockRowCount; + Assert(rowcount > 0); + + /* new block, reset blockRowsProcessed */ + ds->blockRowsProcessed = 0; + + if (startrow + rowcount - 1 >= targrow) + { + /* read a new buffer to consume */ + datumstreamread_block_content(ds); + + if (!aocs_gettuple_column(scan, attno, startrow, targrow, chkvisimap, slot)) + { + ret = false; + /* must update tracking vars before return */ + goto out; + } + + chkvisimap = false; + /* done this column */ + break; + } + + startrow += rowcount; + AppendOnlyStorageRead_SkipCurrentBlock(&ds->ao_read); + /* continue next block */ + } + else + pg_unreachable(); /* unreachable code */ + } + } + +out: + /* update rows processed */ + scan->segrowsprocessed = rowstoprocess; + + if (ret) + { + ExecStoreVirtualTuple(slot); + pgstat_count_heap_getnext(scan->rs_base.rs_rd); + } + + return ret; +} + +/* + * Given a specific target row number 'targrow' (in the space of all row numbers + * physically present in the table, i.e. across all segfiles), scan and return + * the corresponding tuple in 'slot'. + * + * If the tuple is visible, return true. Otherwise, return false. + */ +bool +aocs_get_target_tuple(AOCSScanDesc aoscan, int64 targrow, TupleTableSlot *slot) +{ + if (aoscan->columnScanInfo.relationTupleDesc == NULL) + { + aoscan->columnScanInfo.relationTupleDesc = slot->tts_tupleDescriptor; + /* Pin it! ... and of course release it upon destruction / rescan */ + PinTupleDesc(aoscan->columnScanInfo.relationTupleDesc); + initscan_with_colinfo(aoscan); + } + + if (aoscan->blkdirscan != NULL) + return aocs_blkdirscan_get_target_tuple(aoscan, targrow, slot); + + if (aocs_getsegment(aoscan, targrow) < 0) + { + /* all done */ + ExecClearTuple(slot); + return false; + } + + /* + * Unlike AO_ROW, AO_COLUMN may have different varblocks + * for different columns, so we get per-column tuple directly + * on the way of walking per-column varblock. + */ + return aocs_gettuple(aoscan, targrow, slot); +} static pg_attribute_hot_inline bool aocs_getnext_noqual(AOCSScanDesc scan, ScanDirection direction, TupleTableSlot *slot) @@ -689,6 +1142,9 @@ aocs_getnext_noqual(AOCSScanDesc scan, ScanDirection direction, TupleTableSlot * Assert(ScanDirectionIsForward(direction)); + /* should not be in ANALYZE - we use a different API */ + Assert((scan->rs_base.rs_flags & SO_TYPE_ANALYZE) == 0); + if (scan->columnScanInfo.relationTupleDesc == NULL) { scan->columnScanInfo.relationTupleDesc = slot->tts_tupleDescriptor; @@ -782,9 +1238,6 @@ ReadNext: * The tuple is invisible. * In `analyze`, we can simply return false */ - if ((scan->rs_base.rs_flags & SO_TYPE_ANALYZE) != 0) - return false; - rowNum = INT64CONST(-1); visible_pass = false; continue; /* not break, need advance for other cols */ @@ -930,11 +1383,7 @@ ReadNext: { /* * The tuple is invisible. - * In `analyze`, we can simply return false */ - if ((scan->rs_base.rs_flags & SO_TYPE_ANALYZE) != 0) - return false; - rowNum = INT64CONST(-1); visible_pass = false; continue; /* not break, need advance for other cols */ diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 504e9c0fb7a..22ca114cfe0 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -47,6 +47,7 @@ #include "utils/lsyscache.h" #include "utils/pg_rusage.h" #include "utils/guc.h" +#include "utils/sampling.h" #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) @@ -1623,10 +1624,13 @@ static bool aoco_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) { - AOCSScanDesc aoscan = (AOCSScanDesc) scan; - aoscan->targetTupleId = blockno; - - return true; + /* + * For append-optimized relations, we use a separate sampling + * method. See table_relation_acquire_sample_rows(). + */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("API not supported for appendoptimized relations"))); } static bool @@ -1634,28 +1638,78 @@ aoco_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) { - AOCSScanDesc aoscan = (AOCSScanDesc) scan; - bool ret = false; + /* + * For append-optimized relations, we use a separate sampling + * method. See table_relation_acquire_sample_rows(). + */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("API not supported for appendoptimized relations"))); +} - /* skip several tuples if they are not sampling target */ - while (aoscan->targetTupleId > aoscan->nextTupleId) - { - aoco_getnextslot(scan, ForwardScanDirection, slot); - aoscan->nextTupleId++; - } +static int +aoco_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, + int targrows, double *totalrows, double *totaldeadrows) +{ + int numrows = 0; /* # rows now in reservoir */ + double liverows = 0; /* # live rows seen */ + double deadrows = 0; /* # dead rows seen */ - if (aoscan->targetTupleId == aoscan->nextTupleId) + Assert(targrows > 0); + + TableScanDesc scan = table_beginscan_analyze(onerel); + TupleTableSlot *slot = table_slot_create(onerel, NULL); + AOCSScanDesc aocoscan = (AOCSScanDesc) scan; + + int64 totaltupcount = AOCSScanDesc_TotalTupCount(aocoscan); + int64 totaldeadtupcount = 0; + if (aocoscan->total_seg > 0 ) + totaldeadtupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&aocoscan->visibilityMap); + /* + * The conversion from int64 to double (53 significant bits) is safe as the + * AOTupleId is 48bits, the max value of totalrows is never greater than + * AOTupleId_MaxSegmentFileNum * AOTupleId_MaxRowNum (< 48 significant bits). + */ + *totalrows = (double) (totaltupcount - totaldeadtupcount); + *totaldeadrows = (double) totaldeadtupcount; + + /* Prepare for sampling tuple numbers */ + RowSamplerData rs; + RowSampler_Init(&rs, *totalrows, targrows, random()); + + while (RowSampler_HasMore(&rs)) { - ret = aoco_getnextslot(scan, ForwardScanDirection, slot); - aoscan->nextTupleId++; + aocoscan->targrow = RowSampler_Next(&rs); + + vacuum_delay_point(); - if (ret) - *liverows += 1; + if (aocs_get_target_tuple(aocoscan, aocoscan->targrow, slot)) + { + rows[numrows++] = ExecCopySlotHeapTuple(slot); + liverows++; + } else - *deadrows += 1; /* if return an invisible tuple */ + deadrows++; + + ExecClearTuple(slot); } - return ret; + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); + + /* + * Emit some interesting relation info + */ + ereport(elevel, + (errmsg("\"%s\": scanned " INT64_FORMAT " rows, " + "containing %.0f live rows and %.0f dead rows; " + "%d rows in sample, %.0f accurate total live rows, " + "%.f accurate total dead rows", + RelationGetRelationName(onerel), + rs.m, liverows, deadrows, numrows, + *totalrows, *totaldeadrows))); + + return numrows; } static double @@ -2588,6 +2642,7 @@ static TableAmRoutine ao_column_methods = { .relation_vacuum = aoco_vacuum_rel, .scan_analyze_next_block = aoco_scan_analyze_next_block, .scan_analyze_next_tuple = aoco_scan_analyze_next_tuple, + .relation_acquire_sample_rows = aoco_acquire_sample_rows, .index_build_range_scan = aoco_index_build_range_scan, .index_validate_scan = aoco_index_validate_scan, @@ -2602,7 +2657,6 @@ static TableAmRoutine ao_column_methods = { .scan_bitmap_next_tuple = aoco_scan_bitmap_next_tuple, .scan_sample_next_block = aoco_scan_sample_next_block, .scan_sample_next_tuple = aoco_scan_sample_next_tuple, - .acquire_sample_rows = acquire_sample_rows, .amoptions = ao_amoptions, .swap_relation_files = aoco_swap_relation_files, diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 2f590a8678e..508ddcfbaa9 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -1118,6 +1118,305 @@ getNextBlock(AppendOnlyScanDesc scan) return true; } +static int +appendonly_locate_target_segment(AppendOnlyScanDesc scan, int64 targrow) +{ + int64 rowcount; + + for (int i = scan->aos_segfiles_processed - 1; i < scan->aos_total_segfiles; i++) + { + if (i < 0) + continue; + + rowcount = scan->aos_segfile_arr[i]->total_tupcount; + if (rowcount <= 0) + continue; + + if (scan->segfirstrow + rowcount - 1 >= targrow) + { + /* found the target segment */ + return i; + } + + /* continue next segment */ + scan->segfirstrow += rowcount; + scan->segrowsprocessed = 0; + } + + /* row is beyond the total number of rows in the relation */ + return -1; +} + +/* + * returns the segfile number in which `targrow` locates + */ +static int +appendonly_getsegment(AppendOnlyScanDesc scan, int64 targrow) +{ + int segidx, segno; + + /* locate the target segment */ + segidx = appendonly_locate_target_segment(scan, targrow); + if (segidx < 0) + { + CloseScannedFileSeg(scan); + + /* done reading all segfiles */ + Assert(scan->aos_done_all_segfiles); + + return -1; + } + + if (segidx + 1 > scan->aos_segfiles_processed) + { + /* done current segfile */ + CloseScannedFileSeg(scan); + /* + * Adjust aos_segfiles_processed to guide + * SetNextFileSegForRead() opening next + * right segfile. + */ + scan->aos_segfiles_processed = segidx; + } + + segno = scan->aos_segfile_arr[segidx]->segno; + Assert(segno > InvalidFileSegNumber && segno <= AOTupleId_MaxSegmentFileNum); + + if (scan->aos_need_new_segfile) + { + if (SetNextFileSegForRead(scan)) + { + Assert(scan->segrowsprocessed == 0); + scan->needNextBuffer = true; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Unexpected behavior, failed to open segno %d during scanning AO table %s", + segno, RelationGetRelationName(scan->aos_rd)))); + } + } + + return segno; +} + +static inline int64 +appendonly_block_remaining_rows(AppendOnlyScanDesc scan) +{ + return (scan->executorReadBlock.rowCount - scan->executorReadBlock.blockRowsProcessed); +} + +/* + * locates the block in which `targrow` exists + */ +static void +appendonly_getblock(AppendOnlyScanDesc scan, int64 targrow, int64 *startrow) +{ + AppendOnlyExecutorReadBlock *varblock = &scan->executorReadBlock; + int64 rowcount = -1; + + if (!scan->needNextBuffer) + { + /* we have a current block */ + rowcount = appendonly_block_remaining_rows(scan); + Assert(rowcount >= 0); + + if (*startrow + rowcount - 1 >= targrow) + { + /* row lies in current block, nothing to do */ + return; + } + else + { + /* skip scanning remaining rows */ + *startrow += rowcount; + scan->needNextBuffer = true; + } + } + + /* + * Keep reading block headers until we find the block containing + * the target row. + */ + while (true) + { + elog(DEBUG2, "appendonly_getblock(): [targrow: %ld, currow: %ld, diff: %ld, " + "startrow: %ld, rowcount: %ld, segfirstrow: %ld, segrowsprocessed: %ld, " + "blockRowsProcessed: %ld, blockRowCount: %d]", targrow, *startrow + rowcount - 1, + *startrow + rowcount - 1 - targrow, *startrow, rowcount, scan->segfirstrow, + scan->segrowsprocessed, varblock->blockRowsProcessed, + varblock->rowCount); + + if (AppendOnlyExecutorReadBlock_GetBlockInfo(&scan->storageRead, varblock)) + { + /* new block, reset blockRowsProcessed */ + varblock->blockRowsProcessed = 0; + rowcount = appendonly_block_remaining_rows(scan); + Assert(rowcount > 0); + if (*startrow + rowcount - 1 >= targrow) + { + AppendOnlyExecutorReadBlock_GetContents(varblock); + /* got a new buffer to consume */ + scan->needNextBuffer = false; + return; + } + + *startrow += rowcount; + AppendOnlyExecutionReadBlock_FinishedScanBlock(varblock); + AppendOnlyStorageRead_SkipCurrentBlock(&scan->storageRead); + /* continue next block */ + } + else + pg_unreachable(); /* unreachable code */ + } +} + +/* + * block directory based get_target_tuple() + */ +static bool +appendonly_blkdirscan_get_target_tuple(AppendOnlyScanDesc scan, int64 targrow, TupleTableSlot *slot) +{ + int segno, segidx; + int64 rownum, rowsprocessed; + AOTupleId aotid; + AppendOnlyBlockDirectory *blkdir = &scan->aofetch->blockDirectory; + + Assert(scan->blkdirscan != NULL); + + /* locate the target segment */ + segidx = appendonly_locate_target_segment(scan, targrow); + if (segidx < 0) + return false; + + scan->aos_segfiles_processed = segidx + 1; + + segno = scan->aos_segfile_arr[segidx]->segno; + Assert(segno > InvalidFileSegNumber && segno <= AOTupleId_MaxSegmentFileNum); + + /* + * Note: It is safe to assume that the scan's segfile array and the + * blockdir's segfile array are identical. Otherwise, we should stop + * processing and throw an exception to make the error visible. + */ + if (blkdir->segmentFileInfo[segidx]->segno != segno) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("segfile array contents in both scan descriptor " + "and block directory are not identical on " + "append-optimized relation '%s'", + RelationGetRelationName(blkdir->aoRel)))); + } + + /* + * Set the current segfile info in the blkdir struct, so we can + * reuse the (cached) block directory entry during the tuple fetch + * operation below. See AppendOnlyBlockDirectory_GetCachedEntry(). + */ + blkdir->currentSegmentFileNum = blkdir->segmentFileInfo[segidx]->segno; + blkdir->currentSegmentFileInfo = blkdir->segmentFileInfo[segidx]; + + /* + * "segfirstrow" should be always pointing to the first row of + * a new segfile in blkdir based ANALYZE, only locate_target_segment + * could update its value. + * + * "segrowsprocessed" is used for tracking the position of + * processed rows in the current segfile. + */ + rowsprocessed = scan->segfirstrow + scan->segrowsprocessed; + /* locate the target row by seqscan block directory */ + rownum = AOBlkDirScan_GetRowNum(scan->blkdirscan, + segno, + 0, + targrow, + &rowsprocessed); + + elog(DEBUG2, "AOBlkDirScan_GetRowNum(segno: %d, col: %d, targrow: %ld): " + "[segfirstrow: %ld, segrowsprocessed: %ld, rownum: %ld, cached_mpentry_num: %d]", + segno, 0, targrow, scan->segfirstrow, scan->segrowsprocessed, rownum, + blkdir->cached_mpentry_num); + + if (rownum < 0) + return false; + + scan->segrowsprocessed = rowsprocessed - scan->segfirstrow; + + /* form the target tuple TID */ + AOTupleIdInit(&aotid, segno, rownum); + + /* ensure the target minipage entry was stored in fetch descriptor */ + Assert(blkdir->cached_mpentry_num != InvalidEntryNum); + Assert(blkdir->minipages == &blkdir->minipages[0]); + + /* fetch the target tuple */ + if(!appendonly_fetch(scan->aofetch, &aotid, slot)) + return false; + + /* OK to return this tuple */ + pgstat_count_heap_fetch(scan->aos_rd); + + return true; +} + +/* + * Given a specific target row number 'targrow' (in the space of all row numbers + * physically present in the table, i.e. across all segfiles), scan and return + * the corresponding tuple in 'slot'. + * + * If the tuple is visible, return true. Otherwise, return false. + */ +bool +appendonly_get_target_tuple(AppendOnlyScanDesc aoscan, int64 targrow, TupleTableSlot *slot) +{ + AppendOnlyExecutorReadBlock *varblock = &aoscan->executorReadBlock; + bool visible; + int64 rowsprocessed, rownum; + int segno; + AOTupleId aotid; + + if (aoscan->blkdirscan != NULL) + return appendonly_blkdirscan_get_target_tuple(aoscan, targrow, slot); + + segno = appendonly_getsegment(aoscan, targrow); + if (segno < 0) + return false; + + rowsprocessed = aoscan->segfirstrow + aoscan->segrowsprocessed; + + appendonly_getblock(aoscan, targrow, &rowsprocessed); + + aoscan->segrowsprocessed = rowsprocessed - aoscan->segfirstrow; + + Assert(rowsprocessed + varblock->rowCount - 1 >= targrow); + rownum = varblock->blockFirstRowNum + (targrow - rowsprocessed); + + elog(DEBUG2, "appendonly_getblock() returns: [segno: %d, rownum: %ld]", segno, rownum); + + /* form the target tuple TID */ + AOTupleIdInit(&aotid, segno, rownum); + + visible = (aoscan->snapshot == SnapshotAny || + AppendOnlyVisimap_IsVisible(&aoscan->visibilityMap, &aotid)); + + if (visible && AppendOnlyExecutorReadBlock_FetchTuple(varblock, rownum, 0, NULL, slot)) + { + /* OK to return this tuple */ + pgstat_count_heap_fetch(aoscan->aos_rd); + } + else + { + if (slot != NULL) + ExecClearTuple(slot); + + return false; + } + + return true; +} + /* ---------------- * appendonlygettup - fetch next appendonly tuple * @@ -1138,6 +1437,8 @@ appendonlygettup(AppendOnlyScanDesc scan, TupleTableSlot *slot) { Assert(ScanDirectionIsForward(dir)); + /* should not be in ANALYZE - we use a different API */ + Assert((scan->rs_base.rs_flags & SO_TYPE_ANALYZE) == 0); Assert(scan->usableBlockSize > 0); bool isSnapshotAny = (scan->snapshot == SnapshotAny); @@ -1178,12 +1479,7 @@ appendonlygettup(AppendOnlyScanDesc scan, if (!isSnapshotAny && !AppendOnlyVisimap_IsVisible(&scan->visibilityMap, aoTupleId)) { - /* - * The tuple is invisible. - * In `analyze`, we can simply return false - */ - if ((scan->rs_base.rs_flags & SO_TYPE_ANALYZE) != 0) - return false; + /* The tuple is invisible */ } else { @@ -1310,6 +1606,34 @@ finishWriteBlock(AppendOnlyInsertDesc aoInsertDesc) Assert(!AppendOnlyStorageWrite_IsBufferAllocated(&aoInsertDesc->storageWrite)); } +static void +appendonly_blkdirscan_init(AppendOnlyScanDesc scan) +{ + if (scan->aofetch == NULL) + scan->aofetch = appendonly_fetch_init(scan->aos_rd, + scan->snapshot, + scan->appendOnlyMetaDataSnapshot); + + scan->blkdirscan = palloc0(sizeof(AOBlkDirScanData)); + AOBlkDirScan_Init(scan->blkdirscan, &scan->aofetch->blockDirectory); +} + +static void +appendonly_blkdirscan_finish(AppendOnlyScanDesc scan) +{ + AOBlkDirScan_Finish(scan->blkdirscan); + pfree(scan->blkdirscan); + scan->blkdirscan = NULL; + + if (scan->aofetch != NULL) + { + appendonly_fetch_finish(scan->aofetch); + pfree(scan->aofetch); + scan->aofetch = NULL; + } +} + + /* ---------------------------------------------------------------- * append-only access method interface * ---------------------------------------------------------------- @@ -1431,19 +1755,33 @@ appendonly_beginrangescan_internal(Relation relation, scan->blockDirectory = NULL; + if ((flags & SO_TYPE_ANALYZE) != 0) + { + scan->segrowsprocessed = 0; + scan->segfirstrow = 0; + scan->targrow = 0; + } + if (segfile_count > 0) { Oid visimaprelid; Oid visimapidxid; + Oid blkdirrelid; GetAppendOnlyEntryAuxOids(relation, - NULL, NULL, NULL, &visimaprelid, &visimapidxid); + NULL, &blkdirrelid, NULL, &visimaprelid, &visimapidxid); AppendOnlyVisimap_Init(&scan->visibilityMap, visimaprelid, visimapidxid, AccessShareLock, appendOnlyMetaDataSnapshot); + + if ((flags & SO_TYPE_ANALYZE) != 0) + { + if (OidIsValid(blkdirrelid)) + appendonly_blkdirscan_init(scan); + } } scan->totalBytesRead = 0; @@ -1519,7 +1857,7 @@ appendonly_beginscan(Relation relation, */ seginfo = GetAllFileSegInfo(relation, appendOnlyMetaDataSnapshot, &segfile_count, NULL); - + aoscan = appendonly_beginrangescan_internal(relation, snapshot, appendOnlyMetaDataSnapshot, @@ -1614,6 +1952,9 @@ appendonly_endscan(TableScanDesc scan) if (aoscan->aos_total_segfiles > 0) AppendOnlyVisimap_Finish(&aoscan->visibilityMap, AccessShareLock); + if (aoscan->blkdirscan != NULL) + appendonly_blkdirscan_finish(aoscan); + if (aoscan->aofetch) { appendonly_fetch_finish(aoscan->aofetch); diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index 0136d4a850f..218ade630d4 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -46,6 +46,7 @@ #include "utils/lsyscache.h" #include "utils/pg_rusage.h" #include "utils/guc.h" +#include "utils/sampling.h" #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) @@ -1492,10 +1493,13 @@ static bool appendonly_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) { - AppendOnlyScanDesc aoscan = (AppendOnlyScanDesc) scan; - aoscan->targetTupleId = blockno; - - return true; + /* + * For append-optimized relations, we use a separate sampling + * method. See table_relation_acquire_sample_rows(). + */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("API not supported for appendoptimized relations"))); } static bool @@ -1503,30 +1507,91 @@ appendonly_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) { + /* + * For append-optimized relations, we use a separate sampling + * method. See table_relation_acquire_sample_rows(). + */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("API not supported for appendoptimized relations"))); +} + +/* + * Implementation of relation_acquire_sample_rows(). + * + * As opposed to upstream's method of 2-stage sampling, here we can simply use + * Knuth's S algorithm (TAOCP Part 2 Section 3.4.2) as we clearly know N - the + * population size up front (i.e the total number of rows in the relation) + * + * Although an estimate is demanded for the total live rows and total dead rows + * in the table, we can actually return their exact values from aux table metadata. + * + * We intrinsically return rows in physical order, since the rows sampled by + * Algorithm S are in physical order. + */ +static int +appendonly_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, + int targrows, double *totalrows, double *totaldeadrows) +{ + int numrows = 0; /* # number of rows sampled */ + double liverows = 0; /* # live rows seen */ + double deadrows = 0; /* # dead rows seen */ + + Assert(targrows > 0); + + TableScanDesc scan = table_beginscan_analyze(onerel); + TupleTableSlot *slot = table_slot_create(onerel, NULL); AppendOnlyScanDesc aoscan = (AppendOnlyScanDesc) scan; - bool ret = false; - /* skip several tuples if they are not sampling target */ - while (!aoscan->aos_done_all_segfiles - && aoscan->targetTupleId > aoscan->nextTupleId) - { - appendonly_getnextslot(scan, ForwardScanDirection, slot); - aoscan->nextTupleId++; - } + int64 totaltupcount = AppendOnlyScanDesc_TotalTupCount(aoscan); + int64 totaldeadtupcount = 0; + if (aoscan->aos_total_segfiles > 0 ) + totaldeadtupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&aoscan->visibilityMap); + /* + * The conversion from int64 to double (53 significant bits) is safe as the + * AOTupleId is 48bits, the max value of totalrows is never greater than + * AOTupleId_MaxSegmentFileNum * AOTupleId_MaxRowNum (< 48 significant bits). + */ + *totalrows = (double) (totaltupcount - totaldeadtupcount); + *totaldeadrows = (double) totaldeadtupcount; - if (!aoscan->aos_done_all_segfiles - && aoscan->targetTupleId == aoscan->nextTupleId) + /* Prepare for sampling row numbers */ + RowSamplerData rs; + RowSampler_Init(&rs, *totalrows, targrows, random()); + + while (RowSampler_HasMore(&rs)) { - ret = appendonly_getnextslot(scan, ForwardScanDirection, slot); - aoscan->nextTupleId++; + aoscan->targrow = RowSampler_Next(&rs); + + vacuum_delay_point(); - if (ret) - *liverows += 1; + if (appendonly_get_target_tuple(aoscan, aoscan->targrow, slot)) + { + rows[numrows++] = ExecCopySlotHeapTuple(slot); + liverows++; + } else - *deadrows += 1; /* if return an invisible tuple */ + deadrows++; + + ExecClearTuple(slot); } - return ret; + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); + + /* + * Emit some interesting relation info + */ + ereport(elevel, + (errmsg("\"%s\": scanned " INT64_FORMAT " rows, " + "containing %.0f live rows and %.0f dead rows; " + "%d rows in sample, %.0f total live rows, " + "%.f total dead rows", + RelationGetRelationName(onerel), + rs.m, liverows, deadrows, numrows, + *totalrows, *totaldeadrows))); + + return numrows; } static double @@ -2278,6 +2343,7 @@ static const TableAmRoutine ao_row_methods = { .relation_vacuum = appendonly_vacuum_rel, .scan_analyze_next_block = appendonly_scan_analyze_next_block, .scan_analyze_next_tuple = appendonly_scan_analyze_next_tuple, + .relation_acquire_sample_rows = appendonly_acquire_sample_rows, .index_build_range_scan = appendonly_index_build_range_scan, .index_validate_scan = appendonly_index_validate_scan, @@ -2293,7 +2359,6 @@ static const TableAmRoutine ao_row_methods = { .scan_bitmap_next_tuple = appendonly_scan_bitmap_next_tuple, .scan_sample_next_block = appendonly_scan_sample_next_block, .scan_sample_next_tuple = appendonly_scan_sample_next_tuple, - .acquire_sample_rows = acquire_sample_rows, .amoptions = ao_amoptions, .swap_relation_files = appendonly_swap_relation_files, diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index b6a9b17991d..2f9a9f4522a 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -154,6 +154,8 @@ init_internal(AppendOnlyBlockDirectory *blockDirectory) ItemPointerSetInvalid(&minipageInfo->tupleTid); } + blockDirectory->cached_mpentry_num = InvalidEntryNum; + MemoryContextSwitchTo(oldcxt); } @@ -515,6 +517,32 @@ set_directoryentry_range( return true; } +/* + * AppendOnlyBlockDirectory_GetCachedEntry + * + * Return cached minipage entry for avoidance + * of double scans on block directory. + */ +static inline bool +AppendOnlyBlockDirectory_GetCachedEntry( + AppendOnlyBlockDirectory *blockDirectory, + int segmentFileNum, + int columnGroupNo, + AppendOnlyBlockDirectoryEntry *directoryEntry) +{ + MinipagePerColumnGroup *minipageInfo PG_USED_FOR_ASSERTS_ONLY = &blockDirectory->minipages[columnGroupNo]; + + Assert(blockDirectory->cached_mpentry_num != InvalidEntryNum); + Assert(segmentFileNum == blockDirectory->currentSegmentFileNum); + Assert(blockDirectory->currentSegmentFileInfo != NULL); + Assert(minipageInfo->numMinipageEntries > 0); + + return set_directoryentry_range(blockDirectory, + columnGroupNo, + blockDirectory->cached_mpentry_num, + directoryEntry); +} + /* * AppendOnlyBlockDirectory_GetEntry * @@ -547,7 +575,7 @@ AppendOnlyBlockDirectory_GetEntry( HeapTuple tuple = NULL; MinipagePerColumnGroup *minipageInfo = &blockDirectory->minipages[columnGroupNo]; - int entry_no = -1; + int entry_no = InvalidEntryNum; int tmpGroupNo; if (blkdirRel == NULL || blkdirIdx == NULL) @@ -567,6 +595,21 @@ AppendOnlyBlockDirectory_GetEntry( "(%d, %d, " INT64_FORMAT ")", columnGroupNo, segmentFileNum, rowNum))); + /* + * We enable caching minipage entry only for ao_row. + * + * Because ao_column requires all column values, + * but the entry returned here caches for only one + * column. It is unavoidable to scan blkdir again in + * aocs_fetch() to extract all other column entries + * for constructing the whole tuple. + */ + if (!blockDirectory->isAOCol && blockDirectory->cached_mpentry_num != InvalidEntryNum) + return AppendOnlyBlockDirectory_GetCachedEntry(blockDirectory, + segmentFileNum, + columnGroupNo, + directoryEntry); + /* * If the segment file number is the same as * blockDirectory->currentSegmentFileNum, the in-memory minipage may @@ -591,7 +634,8 @@ AppendOnlyBlockDirectory_GetEntry( entry_no = find_minipage_entry(minipageInfo->minipage, minipageInfo->numMinipageEntries, rowNum); - if (entry_no != -1) + + if (entry_no != InvalidEntryNum) { return set_directoryentry_range(blockDirectory, columnGroupNo, @@ -883,7 +927,7 @@ blkdir_entry_exists(AppendOnlyBlockDirectory *blockDirectory, entry_no = find_minipage_entry(minipageInfo->minipage, minipageInfo->numMinipageEntries, rowNum); - if (entry_no != -1) + if (entry_no != InvalidEntryNum) { found = true; break; @@ -1282,7 +1326,7 @@ find_minipage_entry(Minipage *minipage, if (start_no <= end_no) return entry_no; else - return -1; + return InvalidEntryNum; } /* @@ -1620,3 +1664,126 @@ AppendOnlyBlockDirectory_End_forUniqueChecks(AppendOnlyBlockDirectory *blockDire MemoryContextDelete(blockDirectory->memoryContext); } + +/* + * AOBlkDirScan_GetRowNum + * + * Given a target logical row number, + * returns the corresponding physical rowNum, + * or -1 if not found. + * + * targrow: 0-based target logical row number + * startrow: pointer of the start point stepping to targrow + * targsegno: the segfile number in which targrow locates + * colgroupno: current coloumn group number, always 0 for ao_row + */ +int64 +AOBlkDirScan_GetRowNum(AOBlkDirScan blkdirscan, + int targsegno, + int colgroupno, + int64 targrow, + int64 *startrow) +{ + HeapTuple tuple; + TupleDesc tupdesc; + AppendOnlyBlockDirectory *blkdir = blkdirscan->blkdir; + int mpentryi; + MinipagePerColumnGroup *mpinfo; + int64 rownum = -1; + + Assert(targsegno >= 0); + Assert(blkdir != NULL); + + if (blkdirscan->segno != targsegno || blkdirscan->colgroupno != colgroupno) + { + if (blkdirscan->sysscan != NULL) + systable_endscan_ordered(blkdirscan->sysscan); + + ScanKeyData scankeys[2]; + + ScanKeyInit(&scankeys[0], + Anum_pg_aoblkdir_segno, + BTEqualStrategyNumber, + F_INT4EQ, + Int32GetDatum(targsegno)); + + ScanKeyInit(&scankeys[1], + Anum_pg_aoblkdir_columngroupno, + BTEqualStrategyNumber, + F_INT4EQ, + Int32GetDatum(colgroupno)); + + blkdirscan->sysscan = systable_beginscan_ordered(blkdir->blkdirRel, + blkdir->blkdirIdx, + blkdir->appendOnlyMetaDataSnapshot, + 2, /* nkeys */ + scankeys); + blkdirscan->segno = targsegno; + blkdirscan->colgroupno = colgroupno; + /* reset to InvalidEntryNum for new Minipage entry array */ + blkdir->cached_mpentry_num = InvalidEntryNum; + } + + mpentryi = blkdir->cached_mpentry_num; + mpinfo = &blkdir->minipages[colgroupno]; + + while (true) + { + if (mpentryi == InvalidEntryNum) + { + tuple = systable_getnext_ordered(blkdirscan->sysscan, ForwardScanDirection); + if (HeapTupleIsValid(tuple)) + { + tupdesc = RelationGetDescr(blkdir->blkdirRel); + extract_minipage(blkdir, tuple, tupdesc, colgroupno); + /* new minipage */ + mpentryi = 0; + mpinfo = &blkdir->minipages[colgroupno]; + } + else + { + /* done this < segno, colgroupno > */ + systable_endscan_ordered(blkdirscan->sysscan); + blkdirscan->sysscan = NULL; + blkdirscan->segno = -1; + blkdirscan->colgroupno = 0; + /* always set both vars in pairs for safe */ + mpentryi = InvalidEntryNum; + mpinfo = NULL; + + goto out; + } + } + + Assert(mpentryi != InvalidEntryNum); + Assert(mpinfo != NULL); + + for (int i = mpentryi; i < mpinfo->numMinipageEntries; i++) + { + Minipage *mp = mpinfo->minipage; + MinipageEntry *entry = &(mp->entry[i]); + + Assert(entry->firstRowNum > 0); + Assert(entry->rowCount > 0); + + if (*startrow + entry->rowCount - 1 >= targrow) + { + rownum = entry->firstRowNum + (targrow - *startrow); + mpentryi = i; + goto out; + } + + *startrow += entry->rowCount; + } + + /* done this minipage */ + mpentryi = InvalidEntryNum; + mpinfo = NULL; + } + +out: + /* set the result of minipage entry lookup */ + blkdir->cached_mpentry_num = mpentryi; + + return rownum; +} diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 22314d15b07..4535f2fdaba 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2686,7 +2686,6 @@ static const TableAmRoutine heapam_methods = { .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple, .scan_sample_next_block = heapam_scan_sample_next_block, .scan_sample_next_tuple = heapam_scan_sample_next_tuple, - .acquire_sample_rows = acquire_sample_rows, .amoptions = heapam_amoptions, diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 7a6428627f6..5a066e211b1 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -183,13 +183,22 @@ static void compute_index_stats(Relation onerel, double totalrows, MemoryContext col_context); static VacAttrStats *examine_attribute(Relation onerel, int attnum, Node *index_expr, int elevel); +static int acquire_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); static int acquire_sample_rows_dispatcher(Relation onerel, bool inh, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); +static int gp_acquire_sample_rows_func(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); static BlockNumber acquire_index_number_of_blocks(Relation indexrel, Relation tablerel); static void gp_acquire_correlations_dispatcher(Oid relOid, bool inh, float4 *correlations, bool *correlationsIsNull); static int compare_rows(const void *a, const void *b); +static int acquire_inherited_sample_rows(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); static void update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); @@ -351,14 +360,14 @@ analyze_rel_internal(Oid relid, RangeVar *relation, { /* Regular table, so we'll use the regular row acquisition function */ if (onerel->rd_tableam) - acquirefunc = onerel->rd_tableam->acquire_sample_rows; + acquirefunc = onerel->rd_tableam->relation_acquire_sample_rows; /* - * If the TableAmRoutine's acquire_sample_rows if NULL, we use - * acquire_sample_rows as default. + * If the TableAmRoutine's gp_acquire_sample_rows_func if NULL, we use + * gp_acquire_sample_rows_func as default. */ if (acquirefunc == NULL) - acquirefunc = acquire_sample_rows; + acquirefunc = gp_acquire_sample_rows_func; /* Also get regular table's size */ relpages = AcquireNumberOfBlocks(onerel); @@ -1608,6 +1617,32 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr, int elevel) return stats; } +/* + * GPDB: If we are the dispatcher, then issue analyze on the segments and + * collect the statistics from them. + */ +int +gp_acquire_sample_rows_func(Relation onerel, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows) +{ + if (Gp_role == GP_ROLE_DISPATCH && + onerel->rd_cdbpolicy && !GpPolicyIsEntry(onerel->rd_cdbpolicy)) + { + /* Fetch sample from the segments. */ + return acquire_sample_rows_dispatcher(onerel, false, elevel, + rows, targrows, + totalrows, totaldeadrows); + } + + if (RelationIsAppendOptimized(onerel)) + return table_relation_acquire_sample_rows(onerel, elevel, rows, + targrows, totalrows, totaldeadrows); + + return acquire_sample_rows(onerel, elevel, rows, + targrows, totalrows, totaldeadrows); +} + /* * acquire_sample_rows -- acquire a random sample of rows from the table * @@ -1640,14 +1675,8 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr, int elevel) * unbiased estimates of the average numbers of live and dead rows per * block. The previous sampling method put too much credence in the row * density near the start of the table. - * - * The returned list of tuples is in order by physical position in the table. - * (We will rely on this later to derive correlation estimates.) - * - * GPDB: If we are the dispatcher, then issue analyze on the segments and - * collect the statistics from them. */ -int +static int acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows) @@ -1673,42 +1702,22 @@ acquire_sample_rows(Relation onerel, int elevel, Assert(targrows > 0); - if (Gp_role == GP_ROLE_DISPATCH && - onerel->rd_cdbpolicy && !GpPolicyIsEntry(onerel->rd_cdbpolicy)) - { - /* Fetch sample from the segments. */ - return acquire_sample_rows_dispatcher(onerel, false, elevel, - rows, targrows, - totalrows, totaldeadrows); - } - /* - * GPDB: Analyze does make a lot of assumptions regarding the file layout of a - * relation. These assumptions are heap specific and do not hold for AO/AOCO - * relations. In the case of AO/AOCO, what is actually needed and used instead - * of number of blocks, is number of tuples. - * - * GPDB_12_MERGE_FIXME: BlockNumber is uint32 and Number of tuples is uint64. - * That means that after row number UINT_MAX we will never analyze the table. + * GPDB: Legacy analyze does make a lot of assumptions regarding the file + * layout of a relation. These assumptions are heap specific and do not hold + * for AO/AOCO relations. In the case of AO/AOCO, what is actually needed and + * used instead of number of blocks, is number of tuples. Moreover, BlockNumber + * is uint32 and Number of tuples is uint64. That means that after row number + * UINT_MAX we will never analyze the table. + * + * We introduced a tuple based sampling approach for AO/CO tables to address + * above problems, all corresponding logics were moved out of here and enclosed + * in table_relation_acquire_sample_rows(). So leave here an assertion to ensure + * the relation should not be an AO/CO table. */ - if (RelationIsNonblockRelation(onerel)) - { - /* AO/CO/PAX use non-fixed block layout */ - BlockNumber pages; - double tuples; - double allvisfrac; - int32 attr_widths; + Assert(!RelationIsAppendOptimized(onerel)); - table_relation_estimate_size(onerel, &attr_widths, &pages, - &tuples, &allvisfrac); - - if (tuples > UINT_MAX) - tuples = UINT_MAX; - - totalblocks = (BlockNumber)tuples; - } - else - totalblocks = RelationGetNumberOfBlocks(onerel); + totalblocks = RelationGetNumberOfBlocks(onerel); /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ OldestXmin = GetOldestNonRemovableTransactionId(onerel); @@ -1897,13 +1906,13 @@ acquire_sample_rows(Relation onerel, int elevel, * Emit some interesting relation info */ ereport(elevel, - (errmsg("\"%s\": scanned %d of %u pages, " - "containing %.0f live rows and %.0f dead rows; " - "%d rows in sample, %.0f estimated total rows", - RelationGetRelationName(onerel), - bs.m, totalblocks, - liverows, deadrows, - numrows, *totalrows))); + (errmsg("\"%s\": scanned %d of %u pages, " + "containing %.0f live rows and %.0f dead rows; " + "%d rows in sample, %.0f estimated total rows", + RelationGetRelationName(onerel), + bs.m, totalblocks, + liverows, deadrows, + numrows, *totalrows))); return numrows; } @@ -1918,17 +1927,12 @@ compare_rows(const void *a, const void *b) HeapTuple hb = *(const HeapTuple *) b; /* - * GPDB_12_MERGE_FIXME: For AO/AOCO tables, blocknumber does not have a - * meaning and is not set. The current implementation of analyze makes - * assumptions about the file layout which do not hold for these two cases. - * The compare function should maintain the row order as consrtucted, hence - * return 0; - * + * For AO/AOCO tables, blocknumber does not have a meaning and is not set. + * We leave this workaround here for legacy AO/CO analyze still working. * There should be no apparent and measurable perfomance hit from calling * this function. - * - * One possible proper fix is to refactor analyze to use the tableam api and - * this sorting should move to the specific implementation. + * + * The AO/CO Fast Analyze won't reach here anymore. */ if (!BlockNumberIsValid(ItemPointerGetBlockNumberNoCheck(&ha->t_self))) return 0; @@ -2053,14 +2057,14 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, { /* Regular table, so use the regular row acquisition function */ if (childrel->rd_tableam) - acquirefunc = childrel->rd_tableam->acquire_sample_rows; + acquirefunc = childrel->rd_tableam->relation_acquire_sample_rows; /* - * If the TableAmRoutine's acquire_sample_rows if NULL, we use - * acquire_sample_rows as default. + * If the TableAmRoutine's relation_acquire_sample_rows if NULL, we use + * relation_acquire_sample_rows as default. */ if (acquirefunc == NULL) - acquirefunc = acquire_sample_rows; + acquirefunc = gp_acquire_sample_rows_func; relpages = AcquireNumberOfBlocks(childrel); } diff --git a/src/backend/commands/analyzefuncs.c b/src/backend/commands/analyzefuncs.c index 8c9fd56199d..b97bbee048c 100644 --- a/src/backend/commands/analyzefuncs.c +++ b/src/backend/commands/analyzefuncs.c @@ -27,6 +27,7 @@ #include "miscadmin.h" #include "funcapi.h" #include "utils/syscache.h" +#include "utils/faultinjector.h" /** * Statistics related parameters. @@ -303,6 +304,8 @@ gp_acquire_sample_rows(PG_FUNCTION_ARGS) ctx->index++; + SIMPLE_FAULT_INJECTOR("returned_sample_row"); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(res)); } else if (!ctx->summary_sent) diff --git a/src/backend/utils/datumstream/datumstream.c b/src/backend/utils/datumstream/datumstream.c index 42f374d1498..82c2b5a7cb3 100644 --- a/src/backend/utils/datumstream/datumstream.c +++ b/src/backend/utils/datumstream/datumstream.c @@ -1129,7 +1129,7 @@ datumstreamwrite_lob(DatumStreamWrite * acc, return varLen; } -static bool +bool datumstreamread_block_info(DatumStreamRead * acc) { bool readOK = false; diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c index 40e47487c21..58361eb2bff 100644 --- a/src/backend/utils/misc/sampling.c +++ b/src/backend/utils/misc/sampling.c @@ -116,7 +116,11 @@ BlockSampler_Next(BlockSampler bs) } /* - * This is a 64 bit version of BlockSampler. + * This is used for sampling AO/CO row numbers, in the flattened + * row number space, across all segfile tuple counts. 64 bits is + * used for simplicity and is sufficient to hold a maximum tuple + * count of (AOTupleId_MaxSegmentFileNum * MAX_AOREL_CONCURRENCY), + * which can be represented with 48 bits. * * The code is same as BlockSampler except replacing * int type of variables with int64, which is intended @@ -127,7 +131,7 @@ BlockSampler_Next(BlockSampler bs) */ void RowSampler_Init(RowSampler rs, int64 nobjects, int64 samplesize, - long randseed) + long randseed) { rs->N = nobjects; /* measured table size */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 61948090e22..4a877e857b1 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -751,6 +751,13 @@ typedef struct TableAmRoutine double *liverows, double *deadrows, TupleTableSlot *slot); + + int (*relation_acquire_sample_rows) (Relation onerel, + int elevel, + HeapTuple *rows, + int targrows, + double *totalrows, + double *totaldeadrows); /* see table_index_build_range_scan for reference about parameters */ double (*index_build_range_scan) (Relation table_rel, @@ -948,15 +955,6 @@ typedef struct TableAmRoutine struct SampleScanState *scanstate, TupleTableSlot *slot); - /* - * Like scan_sample_next_block and scan_sample_next_tuple, this callback - * is also used to sample tuple rows. As different storage maybe need to - * use different acquire sample rows process, we extend an interface to - * achieve this requirement. - */ - int (*acquire_sample_rows) (Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); /* * This callback is used to parse reloptions for relation/matview/toast. */ @@ -1903,6 +1901,38 @@ table_relation_vacuum(Relation rel, struct VacuumParams *params, rel->rd_tableam->relation_vacuum(rel, params, bstrategy); } +/* + * GPDB: Interface to acquire sample rows from a given relation (currently + * AO/CO). + * + * Selected rows are returned in the caller-allocated array rows[], which + * must have space to hold at least targrows entries. + * + * The actual number of rows selected is returned as the function result. + * We also estimate the total numbers of live and dead rows in the table, + * and return them into *totalrows and *totaldeadrows, respectively. + * + * The returned list of tuples is in order by physical position in the table. + * (We will rely on this later to derive correlation estimates.) + * + * Note: this interface is not used by upstream code. + * The upstream interface (implemented by heap) uses a 2-stage sampling method + * using table_scan_analyze_next_block() and table_scan_analyze_next_tuple(). + * See acquire_sample_rows(). Since this upstream method does not suit AO/CO + * tables we have created the relation_acquire_sample_rows() interface. + * + * Note for future merges: + * We have to keep this interface consistent with acquire_sample_rows(). + */ +static inline int +table_relation_acquire_sample_rows(Relation rel, int elevel, HeapTuple *rows, + int targrows, double *totalrows, double *totaldeadrows) +{ + return rel->rd_tableam->relation_acquire_sample_rows(rel, elevel, rows, + targrows, totalrows, + totaldeadrows); +} + /* * Prepare to analyze block `blockno` of `scan`. The scan needs to have been * started with table_beginscan_analyze(). Note that this routine might diff --git a/src/include/cdb/cdbaocsam.h b/src/include/cdb/cdbaocsam.h index b4a4653bfd0..ab990e2bf01 100644 --- a/src/include/cdb/cdbaocsam.h +++ b/src/include/cdb/cdbaocsam.h @@ -105,6 +105,57 @@ enum AOCSScanDescIdentifier AOCSBITMAPSCANDATA /* am private */ }; +/* + * Used for fetch individual tuples from specified by TID of append only relations + * using the AO Block Directory. + */ +typedef struct AOCSFetchDescData +{ + Relation relation; + Snapshot appendOnlyMetaDataSnapshot; + + /* + * Snapshot to use for non-metadata operations. + * Usually snapshot = appendOnlyMetaDataSnapshot, but they + * differ e.g. if gp_select_invisible is set. + */ + Snapshot snapshot; + + MemoryContext initContext; + + int totalSegfiles; + struct AOCSFileSegInfo **segmentFileInfo; + + /* + * Array containing the maximum row number in each aoseg (to be consulted + * during fetch). This is a sparse array as not all segments are involved + * in a scan. Sparse entries are marked with InvalidAORowNum. + * + * Note: + * If we have no updates and deletes, the total_tupcount is equal to the + * maximum row number. But after some updates and deletes, the maximum row + * number is always much bigger than total_tupcount, so this carries the + * last sequence from gp_fastsequence. + */ + int64 lastSequence[AOTupleId_MultiplierSegmentFileNum]; + + char *segmentFileName; + int segmentFileNameMaxLen; + char *basepath; + + AppendOnlyBlockDirectory blockDirectory; + + DatumStreamFetchDesc *datumStreamFetchDesc; + + int64 skipBlockCount; + + AppendOnlyVisimap visibilityMap; + + Oid segrelid; +} AOCSFetchDescData; + +typedef AOCSFetchDescData *AOCSFetchDesc; + /* * Used for scan of appendoptimized column oriented relations, should be used in * the tableam api related code and under it. @@ -118,19 +169,41 @@ typedef struct AOCSScanDescData /* synthetic system attributes */ ItemPointerData cdb_fake_ctid; - int64 total_row; - int64 segrowsprocessed; /* - * Only used by `analyze` + * used by `analyze` */ - int64 nextTupleId; - int64 targetTupleId; /* - * Only used by `sample scan` - */ + * targrow: the output of the Row-based sampler (Alogrithm S), denotes a + * rownumber in the flattened row number space that is the target of a sample, + * which starts from 0. + * In other words, if we have seg0 rownums: [1, 100], seg1 rownums: [1, 200] + * If targrow = 150, then we are referring to seg1's rownum=51. + */ + int64 targrow; + + /* + * segfirstrow: pointing to the next starting row which is used to check + * the distance to `targrow` + */ + int64 segfirstrow; + + /* + * segrowsprocessed: track the rows processed under the current segfile. + * Don't miss updating it accordingly when "segfirstrow" is updated. + */ + int64 segrowsprocessed; + + AOBlkDirScan blkdirscan; + AOCSFetchDesc aocsfetch; + bool *proj; + + /* + * Only used by `sample scan` + */ int64 fetchTupleId; + int64 nextTupleId; int64 totalTuples; /* * Part of the struct to be used only inside aocsam.c @@ -212,57 +285,6 @@ typedef struct AOCSScanDescData typedef AOCSScanDescData *AOCSScanDesc; -/* - * Used for fetch individual tuples from specified by TID of append only relations - * using the AO Block Directory. - */ -typedef struct AOCSFetchDescData -{ - Relation relation; - Snapshot appendOnlyMetaDataSnapshot; - - /* - * Snapshot to use for non-metadata operations. - * Usually snapshot = appendOnlyMetaDataSnapshot, but they - * differ e.g. if gp_select_invisible is set. - */ - Snapshot snapshot; - - MemoryContext initContext; - - int totalSegfiles; - struct AOCSFileSegInfo **segmentFileInfo; - - /* - * Array containing the maximum row number in each aoseg (to be consulted - * during fetch). This is a sparse array as not all segments are involved - * in a scan. Sparse entries are marked with InvalidAORowNum. - * - * Note: - * If we have no updates and deletes, the total_tupcount is equal to the - * maximum row number. But after some updates and deletes, the maximum row - * number is always much bigger than total_tupcount, so this carries the - * last sequence from gp_fastsequence. - */ - int64 lastSequence[AOTupleId_MultiplierSegmentFileNum]; - - char *segmentFileName; - int segmentFileNameMaxLen; - char *basepath; - - AppendOnlyBlockDirectory blockDirectory; - - DatumStreamFetchDesc *datumStreamFetchDesc; - - int64 skipBlockCount; - - AppendOnlyVisimap visibilityMap; - - Oid segrelid; -} AOCSFetchDescData; - -typedef AOCSFetchDescData *AOCSFetchDesc; - /* * AOCSDeleteDescData is used for delete data from AOCS relations. * It serves an equivalent purpose as AppendOnlyScanDescData @@ -397,6 +419,8 @@ extern void aocs_addcol_emptyvpe( extern void aocs_addcol_setfirstrownum(AOCSAddColumnDesc desc, int64 firstRowNum); +extern bool aocs_get_target_tuple(AOCSScanDesc aoscan, int64 targrow, TupleTableSlot *slot); + extern void aoco_dml_init(Relation relation, CmdType operation); extern void aoco_dml_finish(Relation relation, CmdType operation); @@ -423,5 +447,21 @@ AOCSScanDesc_UpdateTotalBytesRead(AOCSScanDesc scan, AttrNumber attno) scan->totalBytesRead += scan->columnScanInfo.ds[attno]->ao_read.current.uncompressedLen; } +static inline int64 +AOCSScanDesc_TotalTupCount(AOCSScanDesc scan) +{ + Assert(scan != NULL); + + int64 totalrows = 0; + AOCSFileSegInfo **seginfo = scan->seginfo; + + for (int i = 0; i < scan->total_seg; i++) + { + if (seginfo[i]->state != AOSEG_STATE_AWAITING_DROP) + totalrows += seginfo[i]->total_tupcount; + } + + return totalrows; +} #endif /* AOCSAM_H */ diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index 125c1de4d23..58fc77d8f5c 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -141,6 +141,7 @@ typedef struct AppendOnlyExecutorReadBlock int segmentFileNum; int64 totalRowsScanned; + int64 blockRowsProcessed; int64 blockFirstRowNum; int64 headerOffsetInFile; @@ -236,10 +237,31 @@ typedef struct AppendOnlyScanDescData AppendOnlyVisimap visibilityMap; /* - * Only used by `analyze` + * used by `analyze` */ - int64 nextTupleId; - int64 targetTupleId; + + /* + * targrow: the output of the Row-based sampler (Alogrithm S), denotes a + * rownumber in the flattened row number space that is the target of a sample, + * which starts from 0. + * In other words, if we have seg0 rownums: [1, 100], seg1 rownums: [1, 200] + * If targrow = 150, then we are referring to seg1's rownum=51. + */ + int64 targrow; + + /* + * segfirstrow: pointing to the next starting row which is used to check + * the distance to `targrow` + */ + int64 segfirstrow; + + /* + * segrowsprocessed: track the rows processed under the current segfile. + * Don't miss updating it accordingly when "segfirstrow" is updated. + */ + int64 segrowsprocessed; + + AOBlkDirScan blkdirscan; /* scan current state */ BlockNumber rs_nblocks; /* current block */ @@ -454,6 +476,9 @@ extern bool appendonly_getnextslot(TableScanDesc scan, ScanDirection direction, TupleTableSlot *slot); extern uint32 appendonly_scan_flags(Relation relation); +extern bool appendonly_get_target_tuple(AppendOnlyScanDesc aoscan, + int64 targrow, + TupleTableSlot *slot); extern AppendOnlyFetchDesc appendonly_fetch_init( Relation relation, Snapshot snapshot, @@ -514,4 +539,21 @@ AppendOnlyScanDesc_UpdateTotalBytesRead(AppendOnlyScanDesc scan) scan->totalBytesRead += scan->storageRead.current.uncompressedLen; } +static inline int64 +AppendOnlyScanDesc_TotalTupCount(AppendOnlyScanDesc scan) +{ + Assert(scan != NULL); + + int64 totalrows = 0; + FileSegInfo **seginfo = scan->aos_segfile_arr; + + for (int i = 0; i < scan->aos_total_segfiles; i++) + { + if (seginfo[i]->state != AOSEG_STATE_AWAITING_DROP) + totalrows += seginfo[i]->total_tupcount; + } + + return totalrows; +} + #endif /* CDBAPPENDONLYAM_H */ diff --git a/src/include/cdb/cdbappendonlyblockdirectory.h b/src/include/cdb/cdbappendonlyblockdirectory.h index 425cb9f7312..f90f7849fe0 100644 --- a/src/include/cdb/cdbappendonlyblockdirectory.h +++ b/src/include/cdb/cdbappendonlyblockdirectory.h @@ -95,6 +95,8 @@ typedef struct MinipagePerColumnGroup #define IsMinipageFull(minipagePerColumnGroup) \ ((minipagePerColumnGroup)->numMinipageEntries == (uint32) gp_blockdirectory_minipage_size) +#define InvalidEntryNum (-1) + /* * Define a structure for the append-only relation block directory. */ @@ -135,6 +137,14 @@ typedef struct AppendOnlyBlockDirectory ScanKey scanKeys; StrategyNumber *strategyNumbers; + /* + * Minipage entry number, for caching purpose. + * + * XXX: scenarios which call AppendOnlyBlockDirectory_GetEntry() + * may need to consider using this cache. + */ + int cached_mpentry_num; + } AppendOnlyBlockDirectory; @@ -169,10 +179,16 @@ typedef struct AOFetchSegmentFile int64 logicalEof; } AOFetchSegmentFile; -typedef struct AppendOnlyBlockDirectorySeqScan { - AppendOnlyBlockDirectory blkdir; - SysScanDesc sysScan; -} AppendOnlyBlockDirectorySeqScan; +/* + * Tracks block directory scan state for block-directory based ANALYZE. + */ +typedef struct AOBlkDirScanData +{ + AppendOnlyBlockDirectory *blkdir; + SysScanDesc sysscan; + int segno; + int colgroupno; +} AOBlkDirScanData, *AOBlkDirScan; extern void AppendOnlyBlockDirectoryEntry_GetBeginRange( AppendOnlyBlockDirectoryEntry *directoryEntry, @@ -190,6 +206,12 @@ extern bool AppendOnlyBlockDirectory_GetEntry( AOTupleId *aoTupleId, int columnGroupNo, AppendOnlyBlockDirectoryEntry *directoryEntry); +extern int64 AOBlkDirScan_GetRowNum( + AOBlkDirScan blkdirscan, + int targsegno, + int colgroupno, + int64 targrow, + int64 *startrow); extern bool AppendOnlyBlockDirectory_CoversTuple( AppendOnlyBlockDirectory *blockDirectory, AOTupleId *aoTupleId); @@ -243,6 +265,8 @@ extern void AppendOnlyBlockDirectory_DeleteSegmentFile( int columnGroupNo); extern void AppendOnlyBlockDirectory_End_forUniqueChecks( AppendOnlyBlockDirectory *blockDirectory); +extern void AppendOnlyBlockDirectory_End_forSearch_InSequence( + AOBlkDirScan seqscan); extern void AppendOnlyBlockDirectory_InsertPlaceholder(AppendOnlyBlockDirectory *blockDirectory, int64 firstRowNum, @@ -322,4 +346,35 @@ copy_out_minipage(MinipagePerColumnGroup *minipageInfo, minipageInfo->numMinipageEntries = minipageInfo->minipage->nEntry; } +static inline void +AOBlkDirScan_Init(AOBlkDirScan blkdirscan, + AppendOnlyBlockDirectory *blkdir) +{ + blkdirscan->blkdir = blkdir; + blkdirscan->sysscan = NULL; + blkdirscan->segno = -1; + blkdirscan->colgroupno = 0; +} + +/* should be called before fetch_finish() */ +static inline void +AOBlkDirScan_Finish(AOBlkDirScan blkdirscan) +{ + /* + * Make sure blkdir hasn't been destroyed by fetch_finish(), + * or systable_endscan_ordered() will be crashed for sysscan + * is holding blkdir relation which is freed. + */ + Assert(blkdirscan->blkdir != NULL); + + if (blkdirscan->sysscan != NULL) + { + systable_endscan_ordered(blkdirscan->sysscan); + blkdirscan->sysscan = NULL; + } + blkdirscan->segno = -1; + blkdirscan->colgroupno = 0; + blkdirscan->blkdir = NULL; +} + #endif diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 87f19d4e087..8e99c8fd70c 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -434,13 +434,6 @@ extern double anl_random_fract(void); extern double anl_init_selection_state(int n); extern double anl_get_next_S(double t, int n, double *stateptr); -extern int acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); -extern int acquire_inherited_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); - /* in commands/analyzefuncs.c */ extern Datum gp_acquire_sample_rows(PG_FUNCTION_ARGS); extern Datum gp_acquire_correlations(PG_FUNCTION_ARGS); diff --git a/src/include/utils/datumstream.h b/src/include/utils/datumstream.h index 5210159d8b6..278579fb60b 100644 --- a/src/include/utils/datumstream.h +++ b/src/include/utils/datumstream.h @@ -93,6 +93,7 @@ typedef struct DatumStreamRead int64 blockFirstRowNum; int64 blockFileOffset; int blockRowCount; + int blockRowsProcessed; AppendOnlyStorageRead ao_read; @@ -322,8 +323,9 @@ extern bool datumstreamread_find_block(DatumStreamRead * datumStream, extern void *datumstreamread_get_upgrade_space(DatumStreamRead *datumStream, size_t len); +extern bool datumstreamread_block_info(DatumStreamRead * acc); /* - * MPP-17061: make sure datumstream_read_block_info was called first for the CO block + * MPP-17061: make sure datumstreamread_block_info was called first for the CO block * before calling datumstreamread_block_content. */ extern void datumstreamread_block_content(DatumStreamRead * acc); diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h index 9536b07432c..fa3c9aa2ff8 100644 --- a/src/include/utils/sampling.h +++ b/src/include/utils/sampling.h @@ -42,7 +42,7 @@ extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, extern bool BlockSampler_HasMore(BlockSampler bs); extern BlockNumber BlockSampler_Next(BlockSampler bs); -/* 64 bit version of BlockSampler */ +/* 64 bit version of BlockSampler (used for sampling AO/CO table rows) */ typedef struct { int64 N; /* number of objects, known in advance */ diff --git a/src/test/isolation2/expected/ao_blkdir.out b/src/test/isolation2/expected/ao_blkdir.out index f202d8ec4da..0a1eb468592 100644 --- a/src/test/isolation2/expected/ao_blkdir.out +++ b/src/test/isolation2/expected/ao_blkdir.out @@ -319,6 +319,155 @@ COMMIT DROP TABLE ao_blkdir_test; DROP +-- Test `tupcount` in pg_aoseg == sum of number of `row_count` across all +-- aoblkdir entries for each segno. Test with commits, aborts and deletes. + +-- Case1: without VACUUM ANALYZE +CREATE TABLE ao_blkdir_test_rowcount(i int, j int) USING ao_row DISTRIBUTED BY (j); +CREATE +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +4: BEGIN; +BEGIN +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +INSERT 20 +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +INSERT 30 +3: ABORT; +ABORT +3: BEGIN; +BEGIN +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 40) i; +INSERT 40 +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +INSERT 50 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +3: COMMIT; +COMMIT +4: COMMIT; +COMMIT +DELETE FROM ao_blkdir_test_rowcount WHERE j = 7; +DELETE 50 + +CREATE INDEX ao_blkdir_test_rowcount_idx ON ao_blkdir_test_rowcount(i); +CREATE + +SELECT segno, sum(row_count) AS totalrows FROM (SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno; + segno | totalrows +-------+----------- + 1 | 10 + 2 | 20 + 3 | 40 + 4 | 50 +(4 rows) +SELECT segno, sum(tupcount) AS totalrows FROM gp_toolkit.__gp_aoseg('ao_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno; + segno | totalrows +-------+----------- + 1 | 10 + 2 | 20 + 3 | 40 + 4 | 50 +(4 rows) + +-- Case2: with VACUUM ANALYZE +DROP TABLE ao_blkdir_test_rowcount; +DROP +CREATE TABLE ao_blkdir_test_rowcount(i int, j int) USING ao_row DISTRIBUTED BY (j); +CREATE +CREATE INDEX ao_blkdir_test_rowcount_idx ON ao_blkdir_test_rowcount(i); +CREATE +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +4: BEGIN; +BEGIN +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM ao_blkdir_test_rowcount; +INSERT 10 +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM ao_blkdir_test_rowcount; +INSERT 20 +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +INSERT 20 +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM ao_blkdir_test_rowcount; +INSERT 20 +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM ao_blkdir_test_rowcount; +INSERT 40 +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +INSERT 30 +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM ao_blkdir_test_rowcount; +INSERT 30 +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM ao_blkdir_test_rowcount; +INSERT 60 +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +INSERT 50 +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM ao_blkdir_test_rowcount; +INSERT 50 +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM ao_blkdir_test_rowcount; +INSERT 100 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +3: ABORT; +ABORT +4: COMMIT; +COMMIT + +DELETE FROM ao_blkdir_test_rowcount WHERE j = 7; +DELETE 200 +VACUUM ANALYZE ao_blkdir_test_rowcount; +VACUUM + +SELECT segno, sum(row_count) AS totalrows FROM (SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno; + segno | totalrows +-------+----------- + 1 | 40 + 2 | 80 +(2 rows) +SELECT segno, sum(tupcount) AS totalrows FROM gp_toolkit.__gp_aoseg('ao_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno; + segno | totalrows +-------+----------- + 1 | 40 + 2 | 80 + 3 | 0 + 4 | 0 +(4 rows) + +UPDATE ao_blkdir_test_rowcount SET i = i + 1; +UPDATE 120 +VACUUM ANALYZE ao_blkdir_test_rowcount; +VACUUM + +SELECT segno, sum(row_count) AS totalrows FROM (SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno; + segno | totalrows +-------+----------- + 3 | 120 +(1 row) +SELECT segno, sum(tupcount) AS totalrows FROM gp_toolkit.__gp_aoseg('ao_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno; + segno | totalrows +-------+----------- + 1 | 0 + 2 | 0 + 3 | 120 + 4 | 0 +(4 rows) + +DROP TABLE ao_blkdir_test_rowcount; +DROP + -------------------------------------------------------------------------------- -- AOCO tables -------------------------------------------------------------------------------- @@ -825,3 +974,172 @@ DETAIL: Key (i)=(2) already exists. DROP TABLE aoco_blkdir_test; DROP + +-- Test `tupcount` in pg_ao(cs)seg == sum of number of `row_count` across all +-- aoblkdir entries for each <segno, columngroup_no>. Test with commits, aborts +-- and deletes. + +-- Case1: without VACUUM ANALYZE +CREATE TABLE aoco_blkdir_test_rowcount(i int, j int) USING ao_column DISTRIBUTED BY (j); +CREATE +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +4: BEGIN; +BEGIN +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +INSERT 20 +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +INSERT 30 +3: ABORT; +ABORT +3: BEGIN; +BEGIN +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 40) i; +INSERT 40 +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +INSERT 50 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +3: COMMIT; +COMMIT +4: COMMIT; +COMMIT +DELETE FROM aoco_blkdir_test_rowcount WHERE j = 7; +DELETE 50 + +CREATE INDEX aoco_blkdir_test_rowcount_idx ON aoco_blkdir_test_rowcount(i); +CREATE + +SELECT segno, columngroup_no, sum(row_count) AS totalrows FROM (SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno, columngroup_no; + segno | columngroup_no | totalrows +-------+----------------+----------- + 1 | 0 | 10 + 1 | 1 | 10 + 2 | 0 | 20 + 2 | 1 | 20 + 3 | 0 | 40 + 3 | 1 | 40 + 4 | 0 | 50 + 4 | 1 | 50 +(8 rows) +SELECT segno, column_num, sum(tupcount) AS totalrows FROM gp_toolkit.__gp_aocsseg('aoco_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno, column_num; + segno | column_num | totalrows +-------+------------+----------- + 1 | 0 | 10 + 1 | 1 | 10 + 2 | 0 | 20 + 2 | 1 | 20 + 3 | 0 | 40 + 3 | 1 | 40 + 4 | 0 | 50 + 4 | 1 | 50 +(8 rows) + +-- Case2: with VACUUM ANALYZE +DROP TABLE aoco_blkdir_test_rowcount; +DROP +CREATE TABLE aoco_blkdir_test_rowcount(i int, j int) USING ao_column DISTRIBUTED BY (j); +CREATE +CREATE INDEX aoco_blkdir_test_rowcount_idx ON aoco_blkdir_test_rowcount(i); +CREATE +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +4: BEGIN; +BEGIN +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM aoco_blkdir_test_rowcount; +INSERT 10 +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM aoco_blkdir_test_rowcount; +INSERT 20 +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +INSERT 20 +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM aoco_blkdir_test_rowcount; +INSERT 20 +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM aoco_blkdir_test_rowcount; +INSERT 40 +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +INSERT 30 +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM aoco_blkdir_test_rowcount; +INSERT 30 +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM aoco_blkdir_test_rowcount; +INSERT 60 +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +INSERT 50 +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM aoco_blkdir_test_rowcount; +INSERT 50 +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM aoco_blkdir_test_rowcount; +INSERT 100 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +3: ABORT; +ABORT +4: COMMIT; +COMMIT + +DELETE FROM aoco_blkdir_test_rowcount WHERE j = 7; +DELETE 200 +VACUUM ANALYZE aoco_blkdir_test_rowcount; +VACUUM + +SELECT segno, columngroup_no, sum(row_count) AS totalrows FROM (SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno, columngroup_no; + segno | columngroup_no | totalrows +-------+----------------+----------- + 1 | 0 | 40 + 1 | 1 | 40 + 2 | 0 | 80 + 2 | 1 | 80 +(4 rows) +SELECT segno, column_num, sum(tupcount) AS totalrows FROM gp_toolkit.__gp_aocsseg('aoco_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno, column_num; + segno | column_num | totalrows +-------+------------+----------- + 1 | 0 | 40 + 1 | 1 | 40 + 2 | 0 | 80 + 2 | 1 | 80 + 3 | 0 | 0 + 3 | 1 | 0 + 4 | 0 | 0 + 4 | 1 | 0 +(8 rows) + +UPDATE aoco_blkdir_test_rowcount SET i = i + 1; +UPDATE 120 +VACUUM ANALYZE aoco_blkdir_test_rowcount; +VACUUM + +SELECT segno, columngroup_no, sum(row_count) AS totalrows FROM (SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno, columngroup_no; + segno | columngroup_no | totalrows +-------+----------------+----------- + 3 | 0 | 120 + 3 | 1 | 120 +(2 rows) +SELECT segno, column_num, sum(tupcount) AS totalrows FROM gp_toolkit.__gp_aocsseg('aoco_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno, column_num; + segno | column_num | totalrows +-------+------------+----------- + 1 | 0 | 0 + 1 | 1 | 0 + 2 | 0 | 0 + 2 | 1 | 0 + 3 | 0 | 120 + 3 | 1 | 120 + 4 | 0 | 0 + 4 | 1 | 0 +(8 rows) + +DROP TABLE aoco_blkdir_test_rowcount; +DROP diff --git a/src/test/isolation2/input/uao/fast_analyze.source b/src/test/isolation2/input/uao/fast_analyze.source new file mode 100644 index 00000000000..962ba600f75 --- /dev/null +++ b/src/test/isolation2/input/uao/fast_analyze.source @@ -0,0 +1,349 @@ +-- +-- Test AO/CO sampling method. +-- +-- These tests ensure that we achieve our ANALYZE targets for AO/CO tables. +-- +CREATE TABLE fast_analyze_@amname@_1(i int, j int) USING @amname@ DISTRIBUTED BY (j); + +-- Stats target info shows that we will sample 300 * (100) rows. +SHOW default_statistics_target; +SELECT attstattarget FROM pg_attribute + WHERE attrelid = 'fast_analyze_@amname@_1'::regclass AND attname IN ('i', 'j'); + +-------------------------------------------------------------------------------- +-- Scenario 1: +-- We have MORE than 300 * default_statistics_target = 30k rows for a 2 int table, +-- spread across 3 segments, with no aborted rows [2 subcases -> blkdir and +-- non-blkdir]. +-- Expectation: We have collected 30k live rows. +-------------------------------------------------------------------------------- + +-- (a) Without blkdir subcase + +-- Insert 10.5k rows in each QE. +1: BEGIN; +2: BEGIN; +3: BEGIN; +1: INSERT INTO fast_analyze_@amname@_1 SELECT i, 2 FROM generate_series(1, 10500) i; +2: INSERT INTO fast_analyze_@amname@_1 SELECT i, 1 FROM generate_series(1, 10500) i; +3: INSERT INTO fast_analyze_@amname@_1 SELECT i, 5 FROM generate_series(1, 10500) i; +1: COMMIT; +2: COMMIT; +3: COMMIT; + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_1; + +-- We have sampled 10k live rows. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_1(i); + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_1; + +-- We have sampled 10k live rows from each QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +-------------------------------------------------------------------------------- +-- Scenario 2: +-- We have LESS than 300 * default_statistics_target = 30k rows for a 2 int table, +-- spread across 3 segments, with no aborted rows [2 subcases -> blkdir and +-- non-blkdir]. +-- Expectation: We have collected number of live rows = total tupcount of table. +-------------------------------------------------------------------------------- + +CREATE TABLE fast_analyze_@amname@_2(i int, j int) USING @amname@ DISTRIBUTED BY (j); + +-- (a) Without blkdir subcase + +-- Insert 10 rows in each QE. +1: BEGIN; +2: BEGIN; +3: BEGIN; +1: INSERT INTO fast_analyze_@amname@_2 SELECT i, 2 FROM generate_series(1, 10) i; +2: INSERT INTO fast_analyze_@amname@_2 SELECT i, 1 FROM generate_series(1, 10) i; +3: INSERT INTO fast_analyze_@amname@_2 SELECT i, 5 FROM generate_series(1, 10) i; +1: COMMIT; +2: COMMIT; +3: COMMIT; + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_2; + +-- We have sampled 10 live rows from each QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_2(i); + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_2; + +-- We have sampled 10 live rows from each QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +-------------------------------------------------------------------------------- +-- Scenario 3: +-- We have ALL aborted rows [2 subcases -> blkdir and non-blkdir]. +-- Expectation: We have not sampled any live rows. +-------------------------------------------------------------------------------- + +CREATE TABLE fast_analyze_@amname@_3(i int, j int) USING @amname@ DISTRIBUTED BY (j); + +-- (a) Without blkdir subcase + +-- Insert 10 rows in each QE. +1: BEGIN; +2: BEGIN; +3: BEGIN; +1: INSERT INTO fast_analyze_@amname@_3 SELECT i, 2 FROM generate_series(1, 10) i; +2: INSERT INTO fast_analyze_@amname@_3 SELECT i, 1 FROM generate_series(1, 10) i; +3: INSERT INTO fast_analyze_@amname@_3 SELECT i, 5 FROM generate_series(1, 10) i; +1: ABORT; +2: ABORT; +3: ABORT; + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_3; + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_3(i); + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_3; + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +-------------------------------------------------------------------------------- +-- Scenario 4: +-- We have ALL deleted rows [2 subcases -> blkdir and non-blkdir]. +-- Expectation: We have not collected any live rows. +-------------------------------------------------------------------------------- + +CREATE TABLE fast_analyze_@amname@_4(i int, j int) USING @amname@ DISTRIBUTED BY (j); + +-- (a) Without blkdir subcase + +-- Insert 10 rows in each QE. +1: BEGIN; +2: BEGIN; +3: BEGIN; +1: INSERT INTO fast_analyze_@amname@_4 SELECT i, 2 FROM generate_series(1, 10) i; +2: INSERT INTO fast_analyze_@amname@_4 SELECT i, 1 FROM generate_series(1, 10) i; +3: INSERT INTO fast_analyze_@amname@_4 SELECT i, 5 FROM generate_series(1, 10) i; +1: COMMIT; +2: COMMIT; +3: COMMIT; +-- Delete all rows. +DELETE FROM fast_analyze_@amname@_4; +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_4; + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_4(i); + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +ANALYZE fast_analyze_@amname@_4; + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) + FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + +DROP TABLE fast_analyze_@amname@_1; +DROP TABLE fast_analyze_@amname@_2; +DROP TABLE fast_analyze_@amname@_3; +DROP TABLE fast_analyze_@amname@_4; + +-- +-- The following tests ensure fast analyze function to work +-- with multi-segfiles tables under concurrent inserts. +-- + +create table analyze_@amname@ (id int, a int, b inet, c inet) using @amname@ with (compresstype=zlib, compresslevel=3); + +insert into analyze_@amname@ select 2, i, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || + (i%255)::text))::inet, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || + (i%255)::text))::inet from generate_series(1,30000)i; + +insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +-- test ANALYZE after concurrent inserts commit + +1: begin; +1: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +2: begin; +2: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +3: begin; +3: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +4: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +5: analyze analyze_@amname@; + +1: commit; +2: commit; +3: abort; + +1: analyze analyze_@amname@; + +-- test aoblkdir based ANALYZE + +create index on analyze_@amname@(id); + +1: begin; +1: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +2: begin; +2: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +3: begin; +3: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +4: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; + +5: analyze analyze_@amname@; + +1: commit; +2: commit; +3: abort; + +1: analyze analyze_@amname@; + +drop table analyze_@amname@; + +-- test more data and stability, note, it could take a little long time + +create table analyze_@amname@_2 (id int, a int, b inet, c inet) using @amname@ with (compresstype=zlib, compresslevel=3); +insert into analyze_@amname@_2 select 2, i, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || + (i%255)::text))::inet, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || + (i%255)::text))::inet from generate_series(1,1000)i; + +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +1: begin; +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +1: commit; + +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; + +1: begin; +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +1: abort; + +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; + +-- test with aoblkdir + +create index on analyze_@amname@_2(a); + +1: begin; +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +1: commit; + +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; + +1: begin; +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; + +1: abort; + +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; +1: analyze analyze_@amname@_2; + +drop table analyze_@amname@_2; diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 8931e46a5fc..aec0005e8d8 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -148,6 +148,7 @@ test: uao/limit_indexscan_inits_row test: uao/test_pg_appendonly_version_row # Refer to the case comment for why it is commented out. # test: uao/bad_buffer_on_temp_ao_row +test: uao/fast_analyze_row test: uao/create_index_allows_readonly_row test: reorganize_after_ao_vacuum_skip_drop truncate_after_ao_vacuum_skip_drop mark_all_aoseg_await_drop @@ -209,6 +210,7 @@ test: uao/test_pg_appendonly_version_column # Refer to the case comment for why it is commented out. # test: uao/bad_buffer_on_temp_ao_column test: uao/create_index_allows_readonly_column +test: uao/fast_analyze_column # this case contains fault injection, must be put in a separate test group test: terminate_in_gang_creation diff --git a/src/test/isolation2/output/uao/fast_analyze.source b/src/test/isolation2/output/uao/fast_analyze.source new file mode 100644 index 00000000000..7f6a133e18b --- /dev/null +++ b/src/test/isolation2/output/uao/fast_analyze.source @@ -0,0 +1,632 @@ +-- +-- Test AO/CO sampling method. +-- +-- These tests ensure that we achieve our ANALYZE targets for AO/CO tables. +-- +CREATE TABLE fast_analyze_@amname@_1(i int, j int) USING @amname@ DISTRIBUTED BY (j); +CREATE + +-- Stats target info shows that we will sample 300 * (100) rows. +SHOW default_statistics_target; + default_statistics_target +--------------------------- + 100 +(1 row) +SELECT attstattarget FROM pg_attribute WHERE attrelid = 'fast_analyze_@amname@_1'::regclass AND attname IN ('i', 'j'); + attstattarget +--------------- + -1 + -1 +(2 rows) + +-------------------------------------------------------------------------------- +-- Scenario 1: +-- We have MORE than 300 * default_statistics_target = 30k rows for a 2 int table, +-- spread across 3 segments, with no aborted rows [2 subcases -> blkdir and +-- non-blkdir]. +-- Expectation: We have collected 30k live rows. +-------------------------------------------------------------------------------- + +-- (a) Without blkdir subcase + +-- Insert 10.5k rows in each QE. +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +1: INSERT INTO fast_analyze_@amname@_1 SELECT i, 2 FROM generate_series(1, 10500) i; +INSERT 10500 +2: INSERT INTO fast_analyze_@amname@_1 SELECT i, 1 FROM generate_series(1, 10500) i; +INSERT 10500 +3: INSERT INTO fast_analyze_@amname@_1 SELECT i, 5 FROM generate_series(1, 10500) i; +INSERT 10500 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +3: COMMIT; +COMMIT + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_1; +ANALYZE + +-- We have sampled 10k live rows. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10000' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10000' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10000' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_1(i); +CREATE + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_1; +ANALYZE + +-- We have sampled 10k live rows from each QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10000' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10000' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10000' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +-------------------------------------------------------------------------------- +-- Scenario 2: +-- We have LESS than 300 * default_statistics_target = 30k rows for a 2 int table, +-- spread across 3 segments, with no aborted rows [2 subcases -> blkdir and +-- non-blkdir]. +-- Expectation: We have collected number of live rows = total tupcount of table. +-------------------------------------------------------------------------------- + +CREATE TABLE fast_analyze_@amname@_2(i int, j int) USING @amname@ DISTRIBUTED BY (j); +CREATE + +-- (a) Without blkdir subcase + +-- Insert 10 rows in each QE. +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +1: INSERT INTO fast_analyze_@amname@_2 SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +2: INSERT INTO fast_analyze_@amname@_2 SELECT i, 1 FROM generate_series(1, 10) i; +INSERT 10 +3: INSERT INTO fast_analyze_@amname@_2 SELECT i, 5 FROM generate_series(1, 10) i; +INSERT 10 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +3: COMMIT; +COMMIT + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_2; +ANALYZE + +-- We have sampled 10 live rows from each QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_2(i); +CREATE + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_2; +ANALYZE + +-- We have sampled 10 live rows from each QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'10' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +-------------------------------------------------------------------------------- +-- Scenario 3: +-- We have ALL aborted rows [2 subcases -> blkdir and non-blkdir]. +-- Expectation: We have not sampled any live rows. +-------------------------------------------------------------------------------- + +CREATE TABLE fast_analyze_@amname@_3(i int, j int) USING @amname@ DISTRIBUTED BY (j); +CREATE + +-- (a) Without blkdir subcase + +-- Insert 10 rows in each QE. +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +1: INSERT INTO fast_analyze_@amname@_3 SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +2: INSERT INTO fast_analyze_@amname@_3 SELECT i, 1 FROM generate_series(1, 10) i; +INSERT 10 +3: INSERT INTO fast_analyze_@amname@_3 SELECT i, 5 FROM generate_series(1, 10) i; +INSERT 10 +1: ABORT; +ABORT +2: ABORT; +ABORT +3: ABORT; +ABORT + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_3; +ANALYZE + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_3(i); +CREATE + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_3; +ANALYZE + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +-------------------------------------------------------------------------------- +-- Scenario 4: +-- We have ALL deleted rows [2 subcases -> blkdir and non-blkdir]. +-- Expectation: We have not collected any live rows. +-------------------------------------------------------------------------------- + +CREATE TABLE fast_analyze_@amname@_4(i int, j int) USING @amname@ DISTRIBUTED BY (j); +CREATE + +-- (a) Without blkdir subcase + +-- Insert 10 rows in each QE. +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN +1: INSERT INTO fast_analyze_@amname@_4 SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +2: INSERT INTO fast_analyze_@amname@_4 SELECT i, 1 FROM generate_series(1, 10) i; +INSERT 10 +3: INSERT INTO fast_analyze_@amname@_4 SELECT i, 5 FROM generate_series(1, 10) i; +INSERT 10 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +3: COMMIT; +COMMIT +-- Delete all rows. +DELETE FROM fast_analyze_@amname@_4; +DELETE 30 +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_4; +ANALYZE + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +-- (b) With blkdir subcase + +CREATE INDEX ON fast_analyze_@amname@_4(i); +CREATE + +SELECT gp_inject_fault_infinite('returned_sample_row', 'skip', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: +(3 rows) + +ANALYZE fast_analyze_@amname@_4; +ANALYZE + +-- We have not sampled any live rows on any QE. +SELECT gp_inject_fault('returned_sample_row', 'status', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + + Success: fault name:'returned_sample_row' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'set' num times hit:'0' + +(3 rows) + +SELECT gp_inject_fault('returned_sample_row', 'reset', dbid) FROM gp_segment_configuration WHERE content != -1 AND role = 'p'; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +DROP TABLE fast_analyze_@amname@_1; +DROP +DROP TABLE fast_analyze_@amname@_2; +DROP +DROP TABLE fast_analyze_@amname@_3; +DROP +DROP TABLE fast_analyze_@amname@_4; +DROP + +-- +-- The following tests ensure fast analyze function to work +-- with multi-segfiles tables under concurrent inserts. +-- + +create table analyze_@amname@ (id int, a int, b inet, c inet) using @amname@ with (compresstype=zlib, compresslevel=3); +CREATE + +insert into analyze_@amname@ select 2, i, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet from generate_series(1,30000)i; +INSERT 30000 + +insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 +insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +-- test ANALYZE after concurrent inserts commit + +1: begin; +BEGIN +1: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +2: begin; +BEGIN +2: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +3: begin; +BEGIN +3: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +4: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +5: analyze analyze_@amname@; +ANALYZE + +1: commit; +COMMIT +2: commit; +COMMIT +3: abort; +ABORT + +1: analyze analyze_@amname@; +ANALYZE + +-- test aoblkdir based ANALYZE + +create index on analyze_@amname@(id); +CREATE + +1: begin; +BEGIN +1: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +2: begin; +BEGIN +2: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +3: begin; +BEGIN +3: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +4: insert into analyze_@amname@ select * from analyze_@amname@ limit 1000; +INSERT 1000 + +5: analyze analyze_@amname@; +ANALYZE + +1: commit; +COMMIT +2: commit; +COMMIT +3: abort; +ABORT + +1: analyze analyze_@amname@; +ANALYZE + +drop table analyze_@amname@; +DROP + +-- test more data and stability, note, it could take a little long time + +create table analyze_@amname@_2 (id int, a int, b inet, c inet) using @amname@ with (compresstype=zlib, compresslevel=3); +CREATE +insert into analyze_@amname@_2 select 2, i, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet from generate_series(1,1000)i; +INSERT 1000 + +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 1000 +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 2000 +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 4000 +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 8000 +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 16000 +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 32000 +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 64000 +insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 128000 + +1: begin; +BEGIN +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 256000 + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 256000 + +1: commit; +COMMIT + +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE + +1: begin; +BEGIN +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 768000 + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 768000 + +1: abort; +ABORT + +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE + +-- test with aoblkdir + +create index on analyze_@amname@_2(a); +CREATE + +1: begin; +BEGIN +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 1536000 + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 1536000 + +1: commit; +COMMIT + +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE + +1: begin; +BEGIN +1: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 4608000 + +2: insert into analyze_@amname@_2 select * from analyze_@amname@_2; +INSERT 4608000 + +1: abort; +ABORT + +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE +1: analyze analyze_@amname@_2; +ANALYZE + +drop table analyze_@amname@_2; +DROP diff --git a/src/test/isolation2/sql/ao_blkdir.sql b/src/test/isolation2/sql/ao_blkdir.sql index cd8f1b662d0..cff529876c1 100644 --- a/src/test/isolation2/sql/ao_blkdir.sql +++ b/src/test/isolation2/sql/ao_blkdir.sql @@ -96,6 +96,81 @@ WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; DROP TABLE ao_blkdir_test; +-- Test `tupcount` in pg_aoseg == sum of number of `row_count` across all +-- aoblkdir entries for each segno. Test with commits, aborts and deletes. + +-- Case1: without VACUUM ANALYZE +CREATE TABLE ao_blkdir_test_rowcount(i int, j int) USING ao_row DISTRIBUTED BY (j); +1: BEGIN; +2: BEGIN; +3: BEGIN; +4: BEGIN; +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +3: ABORT; +3: BEGIN; +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 40) i; +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +1: COMMIT; +2: COMMIT; +3: COMMIT; +4: COMMIT; +DELETE FROM ao_blkdir_test_rowcount WHERE j = 7; + +CREATE INDEX ao_blkdir_test_rowcount_idx ON ao_blkdir_test_rowcount(i); + +SELECT segno, sum(row_count) AS totalrows FROM + (SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno; +SELECT segno, sum(tupcount) AS totalrows FROM + gp_toolkit.__gp_aoseg('ao_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno; + +-- Case2: with VACUUM ANALYZE +DROP TABLE ao_blkdir_test_rowcount; +CREATE TABLE ao_blkdir_test_rowcount(i int, j int) USING ao_row DISTRIBUTED BY (j); +CREATE INDEX ao_blkdir_test_rowcount_idx ON ao_blkdir_test_rowcount(i); +1: BEGIN; +2: BEGIN; +3: BEGIN; +4: BEGIN; +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM ao_blkdir_test_rowcount; +1: INSERT INTO ao_blkdir_test_rowcount SELECT i, 2 FROM ao_blkdir_test_rowcount; +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM ao_blkdir_test_rowcount; +2: INSERT INTO ao_blkdir_test_rowcount SELECT i, 3 FROM ao_blkdir_test_rowcount; +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM ao_blkdir_test_rowcount; +3: INSERT INTO ao_blkdir_test_rowcount SELECT i, 4 FROM ao_blkdir_test_rowcount; +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM ao_blkdir_test_rowcount; +4: INSERT INTO ao_blkdir_test_rowcount SELECT i, 7 FROM ao_blkdir_test_rowcount; +1: COMMIT; +2: COMMIT; +3: ABORT; +4: COMMIT; + +DELETE FROM ao_blkdir_test_rowcount WHERE j = 7; +VACUUM ANALYZE ao_blkdir_test_rowcount; + +SELECT segno, sum(row_count) AS totalrows FROM + (SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno; +SELECT segno, sum(tupcount) AS totalrows FROM + gp_toolkit.__gp_aoseg('ao_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno; + +UPDATE ao_blkdir_test_rowcount SET i = i + 1; +VACUUM ANALYZE ao_blkdir_test_rowcount; + +SELECT segno, sum(row_count) AS totalrows FROM + (SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno; +SELECT segno, sum(tupcount) AS totalrows FROM + gp_toolkit.__gp_aoseg('ao_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno; + +DROP TABLE ao_blkdir_test_rowcount; + -------------------------------------------------------------------------------- -- AOCO tables -------------------------------------------------------------------------------- @@ -203,3 +278,79 @@ FROM gp_segment_configuration WHERE role = 'p' AND content = 0; 4: INSERT INTO aoco_blkdir_test VALUES (2, 2); DROP TABLE aoco_blkdir_test; + +-- Test `tupcount` in pg_ao(cs)seg == sum of number of `row_count` across all +-- aoblkdir entries for each <segno, columngroup_no>. Test with commits, aborts +-- and deletes. + +-- Case1: without VACUUM ANALYZE +CREATE TABLE aoco_blkdir_test_rowcount(i int, j int) USING ao_column DISTRIBUTED BY (j); +1: BEGIN; +2: BEGIN; +3: BEGIN; +4: BEGIN; +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +3: ABORT; +3: BEGIN; +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 40) i; +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +1: COMMIT; +2: COMMIT; +3: COMMIT; +4: COMMIT; +DELETE FROM aoco_blkdir_test_rowcount WHERE j = 7; + +CREATE INDEX aoco_blkdir_test_rowcount_idx ON aoco_blkdir_test_rowcount(i); + +SELECT segno, columngroup_no, sum(row_count) AS totalrows FROM + (SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno, columngroup_no; +SELECT segno, column_num, sum(tupcount) AS totalrows FROM + gp_toolkit.__gp_aocsseg('aoco_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno, column_num; + +-- Case2: with VACUUM ANALYZE +DROP TABLE aoco_blkdir_test_rowcount; +CREATE TABLE aoco_blkdir_test_rowcount(i int, j int) USING ao_column DISTRIBUTED BY (j); +CREATE INDEX aoco_blkdir_test_rowcount_idx ON aoco_blkdir_test_rowcount(i); +1: BEGIN; +2: BEGIN; +3: BEGIN; +4: BEGIN; +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM generate_series(1, 10) i; +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM aoco_blkdir_test_rowcount; +1: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 2 FROM aoco_blkdir_test_rowcount; +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM generate_series(1, 20) i; +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM aoco_blkdir_test_rowcount; +2: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 3 FROM aoco_blkdir_test_rowcount; +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM generate_series(1, 30) i; +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM aoco_blkdir_test_rowcount; +3: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 4 FROM aoco_blkdir_test_rowcount; +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM generate_series(1, 50) i; +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM aoco_blkdir_test_rowcount; +4: INSERT INTO aoco_blkdir_test_rowcount SELECT i, 7 FROM aoco_blkdir_test_rowcount; +1: COMMIT; +2: COMMIT; +3: ABORT; +4: COMMIT; + +DELETE FROM aoco_blkdir_test_rowcount WHERE j = 7; +VACUUM ANALYZE aoco_blkdir_test_rowcount; + +SELECT segno, columngroup_no, sum(row_count) AS totalrows FROM + (SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno, columngroup_no; +SELECT segno, column_num, sum(tupcount) AS totalrows FROM + gp_toolkit.__gp_aocsseg('aoco_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno, column_num; + +UPDATE aoco_blkdir_test_rowcount SET i = i + 1; +VACUUM ANALYZE aoco_blkdir_test_rowcount; + +SELECT segno, columngroup_no, sum(row_count) AS totalrows FROM + (SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test_rowcount')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0)s GROUP BY segno, columngroup_no ORDER BY segno, columngroup_no; +SELECT segno, column_num, sum(tupcount) AS totalrows FROM + gp_toolkit.__gp_aocsseg('aoco_blkdir_test_rowcount') WHERE segment_id = 0 GROUP BY segno, column_num; + +DROP TABLE aoco_blkdir_test_rowcount; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org