Skip to content

Commit b40bee7

Browse files
Alexandre CostaMsftBrettShirley
authored andcommitted
Space leak report improvements
- Improves concurrency handling. - Fixes cases where the report might fail due to locked tables. The main scenario is a race with table deletion. [Substrate:756e67c89c7312d1302bad38360ba6cb5e127418]
1 parent 3aa8700 commit b40bee7

15 files changed

Lines changed: 287 additions & 159 deletions

File tree

dev/ese/src/_res/jetmsg.mc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2620,6 +2620,8 @@ Space used by root space tree split buffers: %34 page(s) (%35 bytes, %36%%).%n
26202620
Number of cached primary objects: %37%n
26212621
Number of uncached primary objects: %38%n
26222622
Correction applied to space owned by primary objects: %46 page(s) (%47 bytes, %48%%).%n
2623+
Enumeration conflicts resolved successfully: %49%n
2624+
Enumeration conflicts not resolved successfully: %50%n
26232625
Performance: %39 page(s) read, %40 page(s) preread, %41 page(s) referenced, %42 page(s) dirtied, %43 page(s) re-dirtied.%n
26242626
Duration: %44 minute(s) and %45 second(s).%n
26252627
.

dev/ese/src/ese/_log/logredo.cxx

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -980,12 +980,12 @@ LOCAL ERR ErrLGRICreateFucb(
980980
const BOOL fSpace,
981981
FUCB **ppfucb )
982982
{
983-
ERR err = JET_errSuccess;
984-
FUCB *pfucb = pfucbNil;
985-
FCB *pfcb = pfcbNil;
986-
BOOL fState;
987-
ULONG cRetries = 0;
988-
BOOL fCreatedNewFCB = fFalse;
983+
ERR err = JET_errSuccess;
984+
FUCB *pfucb = pfucbNil;
985+
FCB *pfcb = pfcbNil;
986+
FCBStateFlags fcbsf;
987+
ULONG cRetries = 0;
988+
BOOL fCreatedNewFCB = fFalse;
989989

990990
// create fucb
991991
//
@@ -1005,8 +1005,8 @@ LOCAL ERR ErrLGRICreateFucb(
10051005

10061006
// get fcb for table, if one exists
10071007
//
1008-
pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fState, fTrue /* FIncrementRefCount */, fTrue /* fInitForRecovery */);
1009-
Assert( pfcbNil == pfcb || fFCBStateInitialized == fState );
1008+
pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue /* FIncrementRefCount */, fTrue /* fInitForRecovery */);
1009+
Assert( pfcbNil == pfcb || ( fcbsf & fcbsfInitialized ) );
10101010
if ( pfcbNil == pfcb )
10111011
{
10121012
// there exists no fcb for FDP
@@ -1173,8 +1173,7 @@ LOCAL ERR ErrLGRIPurgeFcbs( const IFMP ifmp, const PGNO pgnoFDP, FDPTYPE fFDPTyp
11731173

11741174
Assert( NULL != pctablehash );
11751175

1176-
BOOL fState;
1177-
FCB * pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fState );
1176+
FCB * pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP );
11781177

11791178
if ( pfcbNil == pfcb )
11801179
{
@@ -1385,7 +1384,7 @@ LOCAL ERR ErrLGRIPurgeFcbs( const IFMP ifmp, const PGNO pgnoFDP, FDPTYPE fFDPTyp
13851384
pfcb->Purge();
13861385
}
13871386

1388-
Assert( FCB::PfcbFCBGet( ifmp, pgnoFDP, &fState ) == pfcbNil );
1387+
Assert( FCB::PfcbFCBGet( ifmp, pgnoFDP ) == pfcbNil );
13891388

13901389
return JET_errSuccess;
13911390
}
@@ -10957,10 +10956,9 @@ ERR LOG::ErrLGRIRedoRootPageMove( PIB* const ppib, const DBTIME dbtime )
1095710956
OnDebug( rm.AssertValid( fFalse /* fBeforeMove */, fTrue /* fRedo */ ) );
1095810957

1095910958
// We must not have reloaded these.
10960-
BOOL fUnused = fFalse;
10961-
Assert( FCB::PfcbFCBGet( ifmp, rm.pgnoFDP, &fUnused ) == pfcbNil );
10962-
Assert( FCB::PfcbFCBGet( ifmp, pgnoFDPMSO, &fUnused ) == pfcbNil );
10963-
Assert( FCB::PfcbFCBGet( ifmp, pgnoFDPMSOShadow, &fUnused ) == pfcbNil );
10959+
Assert( FCB::PfcbFCBGet( ifmp, rm.pgnoFDP ) == pfcbNil );
10960+
Assert( FCB::PfcbFCBGet( ifmp, pgnoFDPMSO ) == pfcbNil );
10961+
Assert( FCB::PfcbFCBGet( ifmp, pgnoFDPMSOShadow ) == pfcbNil );
1096410962

1096510963
// We will log the root page move to the RBS even if PerformRootMove is skipped for the page as it is possible page was patched and it's was skipped.
1096610964
// Note: It is important that we capture this into the snapshot after we capture the preimage of the pages being moved.

dev/ese/src/ese/bt.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7778,7 +7778,7 @@ ERR ErrBTIOpen(
77787778
{
77797779
ERR err;
77807780
FCB *pfcb;
7781-
INT fState;
7781+
FCBStateFlags fcbsf;
77827782
ULONG cRetries = 0;
77837783
PIBTraceContextScope tcScope = ppib->InitTraceContextScope( );
77847784
tcScope->iorReason.SetIors( iorsBTOpen );
@@ -7788,13 +7788,13 @@ ERR ErrBTIOpen(
77887788

77897789
// get the FCB for the given ifmp/pgnoFDP
77907790

7791-
pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fState, fTrue, !fWillInitFCB );
7791+
pfcb = FCB::PfcbFCBGet( ifmp, pgnoFDP, &fcbsf, fTrue, !fWillInitFCB );
77927792
if ( pfcb == pfcbNil )
77937793
{
77947794

77957795
// the FCB does not exist
77967796

7797-
Assert( fFCBStateNull == fState );
7797+
Assert( fcbsfNone == fcbsf );
77987798

77997799
// try to create a new B-tree which will cause the creation of the new FCB
78007800

@@ -7821,7 +7821,7 @@ ERR ErrBTIOpen(
78217821
{
78227822
tcScope->nParentObjectClass = pfcb->TCE();
78237823

7824-
if ( fFCBStateInitialized == fState )
7824+
if ( fcbsf & fcbsfInitialized )
78257825
{
78267826
Assert( pfcb->WRefCount() >= 1);
78277827
err = ErrBTOpen( ppib, pfcb, ppfucb );

dev/ese/src/ese/cat.cxx

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10179,7 +10179,7 @@ ERR ErrCATRenameTable(
1017910179
// ================================================================
1018010180
{
1018110181
ERR err;
10182-
INT fState = fFCBStateNull;
10182+
FCBStateFlags fcbsf = fcbsfNone;
1018310183
FCB * pfcbTable = pfcbNil;
1018410184
OBJID objidTable;
1018510185
PGNO pgnoFDPTable;
@@ -10192,10 +10192,10 @@ ERR ErrCATRenameTable(
1019210192
// check to see if the FCB is present and initialized
1019310193
// if its not present we can just update the catalog
1019410194

10195-
pfcbTable = FCB::PfcbFCBGet( ifmp, pgnoFDPTable, &fState );
10195+
pfcbTable = FCB::PfcbFCBGet( ifmp, pgnoFDPTable, &fcbsf );
1019610196
if( pfcbNil != pfcbTable )
1019710197
{
10198-
if( fFCBStateInitialized != fState )
10198+
if( !( fcbsf & fcbsfInitialized ) )
1019910199
{
1020010200

1020110201
// this should only happen if this is called in a multi-threaded scenario
@@ -15241,6 +15241,7 @@ ERR ErrCATGetNextRootObject(
1524115241
_In_ const BOOL fSortedByObjId,
1524215242
_Inout_ FUCB** const ppfucbCatalog,
1524315243
_Out_ OBJID* const pobjid,
15244+
_Out_ PGNO* const ppgnoFDP,
1524415245
_Out_writes_opt_z_( JET_cbNameMost + 1 ) CHAR* const szObjectName )
1524515246
// ================================================================
1524615247
{
@@ -15251,6 +15252,7 @@ ERR ErrCATGetNextRootObject(
1525115252

1525215253
ERR err = JET_errSuccess;
1525315254
OBJID objid = objidNil;
15255+
PGNO pgnoFDP = pgnoNull;
1525415256
FUCB* pfucbCatalog = *ppfucbCatalog;
1525515257
BOOL fInitilializedFucb = fFalse;
1525615258
BOOL fCatalogLatched = fFalse;
@@ -15387,6 +15389,15 @@ ERR ErrCATGetNextRootObject(
1538715389
objid = *( (UnalignedLittleEndian<OBJID>*)dataField.Pv() );
1538815390
Assert( objid != objidNil );
1538915391

15392+
if ( ppgnoFDP != NULL )
15393+
{
15394+
// Retrieve the pgnoFDP.
15395+
Call( ErrRECIRetrieveFixedColumn( pfcbNil, pfucbCatalog->u.pfcb->Ptdb(), fidMSO_PgnoFDP, pfucbCatalog->kdfCurr.data, &dataField ) );
15396+
Assert( dataField.Cb() == sizeof( pgnoFDP ) );
15397+
pgnoFDP = *( ( UnalignedLittleEndian<PGNO>* )dataField.Pv() );
15398+
Assert( pgnoFDP != pgnoNull );
15399+
}
15400+
1539015401
if ( szObjectName != NULL )
1539115402
{
1539215403
// Retrieve the object name.
@@ -15406,6 +15417,10 @@ ERR ErrCATGetNextRootObject(
1540615417
{
1540715418
OSStrCbCopyA( szObjectName, sizeof( szObjectNameT ), szObjectNameT );
1540815419
}
15420+
if ( ppgnoFDP != NULL )
15421+
{
15422+
*ppgnoFDP = pgnoFDP;
15423+
}
1540915424
}
1541015425
else
1541115426
{
@@ -15414,6 +15429,10 @@ ERR ErrCATGetNextRootObject(
1541415429
{
1541515430
*szObjectName = '\0';
1541615431
}
15432+
if ( ppgnoFDP != NULL )
15433+
{
15434+
*ppgnoFDP = pgnoNull;
15435+
}
1541715436
}
1541815437

1541915438
if ( fCatalogLatched )

dev/ese/src/ese/dbutil.cxx

Lines changed: 109 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5153,6 +5153,8 @@ LOCAL VOID DBUTLIReportSpaceLeakEstimationSucceeded(
51535153
const CPG cpgSplitBuffers,
51545154
const ULONG cCachedPrimary,
51555155
const ULONG cUncachedPrimary,
5156+
const ULONG cEnumerationConflictsSucceeded,
5157+
const ULONG cEnumerationConflictsFailed,
51565158
const JET_THREADSTATS& jts,
51575159
const ULONG ulMinElapsed,
51585160
const double dblSecElapsed )
@@ -5189,7 +5191,9 @@ LOCAL VOID DBUTLIReportSpaceLeakEstimationSucceeded(
51895191
OSFormatW( L"%u", cUncachedPrimary ),
51905192
OSFormatW( L"%u", jts.cPageRead ), OSFormatW( L"%u", jts.cPagePreread ), OSFormatW( L"%u", jts.cPageReferenced ), OSFormatW( L"%u", jts.cPageDirtied ), OSFormatW( L"%u", jts.cPageRedirtied ),
51915193
OSFormatW( L"%u", ulMinElapsed ), OSFormatW( L"%.3f", dblSecElapsed ),
5192-
OSFormatW( L"%d", cpgOwnedPrimaryCorrection ), OSFormatW( L"%I64d", pfmp->CbOfCpgSigned( cpgOwnedPrimaryCorrection ) ), ( ( cpgOwnedPrimaryOriginal != 0 ) ? OSFormatW( L"%.3f", ( 100.0 * (double)cpgOwnedPrimaryCorrection ) / (double)cpgOwnedPrimaryOriginal ) : L"-" )
5194+
OSFormatW( L"%d", cpgOwnedPrimaryCorrection ), OSFormatW( L"%I64d", pfmp->CbOfCpgSigned( cpgOwnedPrimaryCorrection ) ), ( ( cpgOwnedPrimaryOriginal != 0 ) ? OSFormatW( L"%.3f", ( 100.0 * (double)cpgOwnedPrimaryCorrection ) / (double)cpgOwnedPrimaryOriginal ) : L"-" ),
5195+
OSFormatW( L"%u", cEnumerationConflictsSucceeded ),
5196+
OSFormatW( L"%u", cEnumerationConflictsFailed )
51935197
};
51945198
UtilReportEvent(
51955199
eventInformation,
@@ -5245,6 +5249,8 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp )
52455249
BOOL fRunning = fFalse;
52465250
JET_THREADSTATS jtsStart = { 0 }, jtsEnd = { 0 };
52475251
OBJID objidLast = objidNil;
5252+
PGNO pgnoFDPLast = pgnoNull;
5253+
ULONG cEnumerationConflictsFailed = 0, cEnumerationConflictsSucceeded = 0;
52485254
CPG cpgOwnedPrimary = 0, cpgOwnedPrimaryCorrection = 0;
52495255
ULONG cCachedPrimary = 0, cUncachedPrimary = 0;
52505256
CPG cpgUsedRoot = 0, cpgUsedOe = 0, cpgUsedAe = 0;
@@ -5277,9 +5283,9 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp )
52775283
ppib->SetFSessionLeakReport();
52785284

52795285
CHAR szObjectName[ JET_cbNameMost + 1 ];
5280-
for ( err = ErrCATGetNextRootObject( ppib, ifmp, fTrue, &pfucbCatalog, &objidLast, szObjectName );
5286+
for ( err = ErrCATGetNextRootObject( ppib, ifmp, fTrue, &pfucbCatalog, &objidLast, &pgnoFDPLast, szObjectName );
52815287
( err >= JET_errSuccess ) && ( objidLast != objidNil );
5282-
err = ErrCATGetNextRootObject( ppib, ifmp, fTrue, &pfucbCatalog, &objidLast, szObjectName ) )
5288+
err = ErrCATGetNextRootObject( ppib, ifmp, fTrue, &pfucbCatalog, &objidLast, &pgnoFDPLast, szObjectName ) )
52835289
{
52845290
#ifdef DEBUG
52855291
Assert( objidLast != objidSystemRoot ); // Root object is not supposed to be returned here.
@@ -5307,37 +5313,111 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp )
53075313
// Test injection.
53085314
OnDebug( while ( objidLast >= (OBJID)UlConfigOverrideInjection( 48550, objidFDPOverMax ) ) );
53095315

5310-
err = ErrFILEOpenTable( ppib, ifmp, &pfucbTable, szObjectName, JET_bitTableReadOnly | JET_bitTableTryPurgeOnClose );
5311-
if ( err == JET_errObjectNotFound )
5316+
BOOL fRetried = fFalse, fRetry = fFalse;
5317+
const BOOL fInfiniteRetries = OnDebugOrRetail( fTrue, fFalse );
5318+
ERR errRetry = JET_errSuccess;
5319+
const CHAR* wszRetryReason = "";
5320+
do
53125321
{
5313-
err = JET_errSuccess;
5314-
}
5315-
else
5316-
{
5317-
Call( err );
5322+
fRetried = fRetry;
5323+
if ( fRetry )
5324+
{
5325+
UtilSleep( 10 );
5326+
fRetry = fFalse;
5327+
}
53185328

5319-
cUncachedPrimary++;
5320-
5321-
// Test injection.
5322-
OnDebug( while ( objidLast >= (OBJID)UlConfigOverrideInjection( 57894, objidFDPOverMax ) ) );
5329+
err = ErrFILEOpenTable( ppib, ifmp, &pfucbTable, szObjectName, JET_bitTableReadOnly | JET_bitTableTryPurgeOnClose );
5330+
if ( ( err == JET_errObjectNotFound ) || ( err == JET_errTableLocked ) )
5331+
{
5332+
// We are probably racing with table deletion.
5333+
FCBStateFlags fcbsf = fcbsfNone;
5334+
const BOOL fFoundFcb = ( FCB::PfcbFCBGet( ifmp, pgnoFDPLast, &fcbsf, fFalse /* fIncrementRefCount */, fTrue /* fInitForRecovery */ ) != pfcbNil );
5335+
const BOOL fDeletePending = fFoundFcb && ( fcbsf & fcbsfDeletePending );
53235336

5324-
Call( ErrSPGetInfo(
5325-
ppib,
5326-
ifmp,
5327-
pfucbTable,
5328-
(BYTE*)&cpgPrimaryObject,
5329-
sizeof( cpgPrimaryObject ),
5330-
fSPOwnedExtent,
5331-
gci::Allow ) );
5337+
if ( fFoundFcb && !fDeletePending )
5338+
{
5339+
// This is unexpected if we know the table is actually getting deleted.
5340+
Assert( err != JET_errObjectNotFound );
5341+
fRetry = fTrue;
5342+
wszRetryReason = "DelNotPending";
5343+
}
5344+
else if ( fFoundFcb && fDeletePending )
5345+
{
5346+
// Table deletion is still pending.
5347+
fRetry = fTrue;
5348+
wszRetryReason = "DelPending";
53325349

5333-
cpgOwnedPrimary += cpgPrimaryObject;
5350+
// Perform cleanup.
5351+
(void)PverFromPpib( ppib )->ErrVERRCEClean( ifmp );
5352+
}
5353+
else
5354+
{
5355+
Assert( !fFoundFcb );
5356+
if ( err == JET_errTableLocked )
5357+
{
5358+
// Either the version store entry for the table deletion has cleared,
5359+
// or an exclusive user released the table and the FCB got purged.
5360+
fRetry = fTrue;
5361+
wszRetryReason = "FcbNotFound";
5362+
}
5363+
else
5364+
{
5365+
// Version store entry for the table deletion has cleared.
5366+
// No need to retry.
5367+
Assert( err == JET_errObjectNotFound );
5368+
}
5369+
}
53345370

5335-
Call( ErrFILECloseTable( ppib, pfucbTable ) );
5336-
pfucbTable = pfucbNil;
5337-
}
5371+
errRetry = err;
5372+
err = JET_errSuccess;
5373+
}
5374+
else
5375+
{
5376+
Assert( !fRetry );
5377+
Call( err );
5378+
5379+
cUncachedPrimary++;
53385380

5339-
Assert( pfucbTable == pfucbNil );
5381+
// Test injection.
5382+
OnDebug( while ( objidLast >= (OBJID)UlConfigOverrideInjection( 57894, objidFDPOverMax ) ) );
5383+
5384+
Call( ErrSPGetInfo(
5385+
ppib,
5386+
ifmp,
5387+
pfucbTable,
5388+
(BYTE*)&cpgPrimaryObject,
5389+
sizeof( cpgPrimaryObject ),
5390+
fSPOwnedExtent,
5391+
gci::Allow ) );
5392+
5393+
cpgOwnedPrimary += cpgPrimaryObject;
5394+
5395+
Call( ErrFILECloseTable( ppib, pfucbTable ) );
5396+
pfucbTable = pfucbNil;
5397+
}
5398+
5399+
if ( fRetried )
5400+
{
5401+
if ( fRetry )
5402+
{
5403+
cEnumerationConflictsFailed++;
5404+
if ( !fInfiniteRetries )
5405+
{
5406+
FireWall( OSFormat( "LeakReportConflict:%s:%d", wszRetryReason, errRetry ) );
5407+
}
5408+
}
5409+
else
5410+
{
5411+
cEnumerationConflictsSucceeded++;
5412+
}
5413+
}
5414+
5415+
Assert( pfucbTable == pfucbNil );
5416+
Assert( err >= JET_errSuccess );
5417+
}
5418+
while ( fRetry && ( !fRetried || fInfiniteRetries ) );
53405419
}
5420+
53415421
pfmp->SetOjidLeakEstimation( objidLast );
53425422

53435423
#ifdef DEBUG
@@ -5506,6 +5586,8 @@ LOCAL ERR ErrDBUTLIEstimateRootSpaceLeak( PIB* const ppib, const IFMP ifmp )
55065586
rgcpgRootSpaceInfo[ 2 ], // cpgSplitBuffers
55075587
cCachedPrimary,
55085588
cUncachedPrimary,
5589+
cEnumerationConflictsSucceeded,
5590+
cEnumerationConflictsFailed,
55095591
jts,
55105592
ulMinElapsed,
55115593
dblSecElapsed );

0 commit comments

Comments
 (0)