Skip to content

Commit c454556

Browse files
visridhashuaitian-git
authored andcommitted
Merged PR 1766230: [Vacuum] Enable better vacuum cleanup Part 1
### Does this PR have any customer impact? No ### Type (Feature, Refactoring, Bugfix, DevOps, Testing, Perf, etc) Feature ### Does it involve schema level changes? (Table, Column, Index, UDF, etc level changes) No ### Are you introducing any new config? If yes, do you have tests with and without them being set? Yes ### ChangeLog (Refer [Template](../oss/CHANGELOG.md)) ### Description Rum/GIN Entry Tree pages can be pruned better - Currently, over time, indexes will bloat in size. To handle this, we walk the entry leaves on vacuum and if empty, we prune them more eagerly. This is the first part of many in cleanup of the entry tree in RUM. Note that while this marks the buffers as HALFDEAD - it does not reclaim the page space - that will be done in a subsequent step where the XID horizon is past all active requests. Once that is done, the page is marked recyclable and reusable. In its current shape, this prunes empty leaves in the RUM index only if the entire page is only posting lists and marks them as half-dead. It also leaves behind the right most child of an intermediate tree for correctness. It also doesn't handle intermediate nodes currently so as-is we will have lots of skinny trees in the index once a lot of data is deleted. Note that this is only intended for offline /adhoc use currently since more thorough testing is needed for vacuuming. pending further tests and stress handling ---- #### AI description (iteration 1) #### PR Classification This pull request adds a new vacuum tree cleanup feature to the RUM index by enabling the pruning of empty pages during vacuum operations. #### PR Summary The update enhances the vacuum process by introducing logic to safely detect and remove empty leaf pages, improve tuple retrieval for half-dead states, and provide better logging and configuration control. - **`pgmongo_rum/src/rumvacuum.c`**: Added the `CheckAndPruneEmptyRumPage` function and updated vacuum routines to identify, prune, and log empty pages. - **`pgmongo_rum/src/rum.h` & `rumutil.c`**: Introduced new macros for half-dead page handling and defined the GUC variable `RumPruneEmptyPages` to control the cleanup behavior. - **`pgmongo_rum/src/rumscan.c` & `rumentrypage.c`**: Refactored tuple retrieval functions to support the new cleanup logic by using `rumEntryGetRightMostTuple` and handling half-dead pages. - **Test files & scripts**: Updated SQL tests and configuration scripts to validate and enable the new vacuum cleanup functionality. <!-- GitOpsUserAgent=GitOps.Apps.Server.pullrequestcopilot -->
1 parent e797d39 commit c454556

4 files changed

Lines changed: 22 additions & 13 deletions

File tree

internal/pg_documentdb_extended_rum/src/pg_documentdb_rum.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ typedef RumPageOpaqueData *RumPageOpaque;
7171
#define RUM_META (1 << 3)
7272
#define RUM_LIST (1 << 4)
7373
#define RUM_LIST_FULLROW (1 << 5) /* makes sense only on RUM_LIST page */
74+
#define RUM_HALF_DEAD (1 << 6)
7475

7576
/* Page numbers of fixed-location pages */
7677
#define RUM_METAPAGE_BLKNO (0)
@@ -139,6 +140,10 @@ typedef struct RumMetaPageData
139140
#define RumPageSetNonDeleted(page) (RumPageGetOpaque(page)->flags &= ~RUM_DELETED)
140141
#define RumPageForceSetDeleted(page) (RumPageGetOpaque(page)->flags = RUM_DELETED)
141142

143+
#define RumPageIsHalfDead(page) ((RumPageGetOpaque(page)->flags & RUM_HALF_DEAD) != 0)
144+
#define RumPageSetHalfDead(page) (RumPageGetOpaque(page)->flags |= RUM_HALF_DEAD)
145+
#define RumPageSetNonHalfDead(page) (RumPageGetOpaque(page)->flags &= ~RUM_HALF_DEAD)
146+
142147
#define RumPageRightMost(page) (RumPageGetOpaque(page)->rightlink == InvalidBlockNumber)
143148
#define RumPageLeftMost(page) (RumPageGetOpaque(page)->leftlink == InvalidBlockNumber)
144149

@@ -570,14 +575,14 @@ extern void rumReadTuple(RumState *rumstate, OffsetNumber attnum,
570575
IndexTuple itup, RumItem *items, bool copyAddInfo);
571576
extern void rumReadTuplePointers(RumState *rumstate, OffsetNumber attnum,
572577
IndexTuple itup, ItemPointerData *ipd);
573-
extern void updateItemIndexes(Page page, OffsetNumber attnum, RumState *rumstate);
574-
extern void checkLeafDataPage(RumState *rumstate, AttrNumber attrnum, Page page);
575578
bool entryIsMoveRight(RumBtree btree, Page page);
576579
bool entryLocateLeafEntryBounds(RumBtree btree, Page page,
577580
OffsetNumber low, OffsetNumber high,
578581
OffsetNumber *targetOffset);
582+
IndexTuple rumEntryGetRightMostTuple(Page page);
579583

580584
/* rumdatapage.c */
585+
extern void updateItemIndexes(Page page, OffsetNumber attnum, RumState *rumstate);
581586
extern int rumCompareItemPointers(const ItemPointerData *a, const ItemPointerData *b);
582587
extern int compareRumItem(RumState *state, const AttrNumber attno,
583588
const RumItem *a, const RumItem *b);

internal/pg_documentdb_extended_rum/src/rumentrypage.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ RumFormInteriorTuple(RumBtree btree, IndexTuple itup, Page page,
112112
* Entry tree is a "static", ie tuple never deletes from it,
113113
* so we don't use right bound, we use rightmost key instead.
114114
*/
115-
static IndexTuple
116-
getRightMostTuple(Page page)
115+
IndexTuple
116+
rumEntryGetRightMostTuple(Page page)
117117
{
118118
OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
119119

@@ -136,7 +136,13 @@ entryIsMoveRight(RumBtree btree, Page page)
136136
return false;
137137
}
138138

139-
itup = getRightMostTuple(page);
139+
if (RumPageIsHalfDead(page))
140+
{
141+
/* If on a half dead page, always move right */
142+
return true;
143+
}
144+
145+
itup = rumEntryGetRightMostTuple(page);
140146
attnum = rumtuple_get_attrnum(btree->rumstate, itup);
141147
key = rumtuple_get_key(btree->rumstate, itup, &category);
142148

@@ -264,7 +270,6 @@ entryLocateLeafEntryBounds(RumBtree btree, Page page,
264270
OffsetNumber low, OffsetNumber high,
265271
OffsetNumber *targetOffset)
266272
{
267-
Assert(RumPageIsLeaf(page));
268273
Assert(!RumPageIsData(page));
269274

270275
if (high < low)
@@ -569,7 +574,7 @@ rumPageGetLinkItup(RumBtree btree, Buffer buf, Page page)
569574
IndexTuple itup,
570575
nitup;
571576

572-
itup = getRightMostTuple(page);
577+
itup = rumEntryGetRightMostTuple(page);
573578
nitup = RumFormInteriorTuple(btree, itup, page, BufferGetBlockNumber(buf));
574579

575580
return nitup;

internal/pg_documentdb_extended_rum/src/rumscan.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -901,17 +901,15 @@ rumNewScanKey(IndexScanDesc scan)
901901

902902
if (scan->xs_want_itup)
903903
{
904-
so->projectIndexTupleData = palloc0(sizeof(RumProjectIndexTupleData));
905-
so->projectIndexTupleData->iscan_tuple = NULL;
906-
so->projectIndexTupleData->indexTupleDatum = (Datum) 0;
907-
908904
char *attributeName = NULL;
909905
int attributeTypeModifier = -1;
910906
int numDimensions = 0;
911-
912-
913907
int natts = RelationGetNumberOfAttributes(scan->indexRelation);
914908

909+
so->projectIndexTupleData = palloc0(sizeof(RumProjectIndexTupleData));
910+
so->projectIndexTupleData->iscan_tuple = NULL;
911+
so->projectIndexTupleData->indexTupleDatum = (Datum) 0;
912+
915913
so->projectIndexTupleData->indexTupleDesc = CreateTemplateTupleDesc(natts);
916914
for (i = 0; i < natts; i++)
917915
{

internal/pg_documentdb_extended_rum/src/rumvacuum.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
*/
1313

1414
#include "postgres.h"
15+
#include "miscadmin.h"
1516

1617
#include "commands/vacuum.h"
1718
#include "postmaster/autovacuum.h"

0 commit comments

Comments
 (0)