-
Notifications
You must be signed in to change notification settings - Fork 63
Expand file tree
/
Copy path_bf.hxx
More file actions
1740 lines (1339 loc) · 64.6 KB
/
_bf.hxx
File metadata and controls
1740 lines (1339 loc) · 64.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#ifndef _BF_HXX_INCLUDED
#define _BF_HXX_INCLUDED
#include "resmgr.hxx"
#include "_bfconst.hxx"
#include "bfftl.hxx"
typedef LONG_PTR IPG;
const IPG ipgNil = IPG( -1 );
struct IFMPPGNO // IFMPPGNO -- Page Key
{
IFMPPGNO() {}
IFMPPGNO( IFMP ifmpIn, PGNO pgnoIn )
: ifmp( ifmpIn ),
pgno( pgnoIn )
{
}
BOOL operator==( const IFMPPGNO& ifmppgno ) const
{
return ifmp == ifmppgno.ifmp && pgno == ifmppgno.pgno;
}
BOOL operator<( const IFMPPGNO& ifmppgno ) const
{
return ( Cmp( this, &ifmppgno ) < 0 );
}
BOOL operator>( const IFMPPGNO& ifmppgno ) const
{
return ( Cmp( this, &ifmppgno ) > 0 );
}
const IFMPPGNO& operator=( const IFMPPGNO& ifmppgno )
{
ifmp = ifmppgno.ifmp;
pgno = ifmppgno.pgno;
return *this;
}
static INT Cmp( const IFMPPGNO * pifmppgno1, const IFMPPGNO * pifmppgno2 )
{
const INT cmp = (INT)( pifmppgno1->ifmp - pifmppgno2->ifmp );
return ( 0 != cmp ? cmp : (INT)( pifmppgno1->pgno - pifmppgno2->pgno ) );
}
ULONG_PTR Hash() const;
IFMP ifmp;
PGNO pgno;
};
// CAtomicBfBitField class
class CAtomicBfBitField
{
private:
typedef union
{
// DWORD that contains the bits.
FLAG32 bits;
// Actual bits.
struct
{
FLAG32 FDependentPurged:1; // BF we were dependent on has been purged.
FLAG32 FImpedingCheckpoint:1; // BF where someone dependant upon this page
// is impeding the checkpoint.
FLAG32 FRangeLocked:1; // BF is range-locked.
FLAG32 rgbitReserved:29; // Free space.
};
} BfBitField;
BfBitField m_bfbf;
// Declaration macro.
#define BitFieldDecl( _Type, _Name ) \
inline _Type _Name() const \
{ \
BfBitField bfbf; \
bfbf.bits = (FLAG32)AtomicRead( (LONG*)&m_bfbf.bits ); \
return (_Type)( bfbf._Name ); \
} \
\
inline void Set##_Name( const _Type _Name ) \
{ \
OSSYNC_FOREVER \
{ \
BfBitField bfbfInitial, bfbfFinal; \
bfbfInitial.bits = bfbfFinal.bits = (FLAG32)AtomicRead( (LONG*)&m_bfbf.bits ); \
bfbfFinal._Name = _Name; \
if ( AtomicCompareExchange( \
(LONG*)&m_bfbf.bits, \
(LONG)bfbfInitial.bits, \
(LONG)bfbfFinal.bits ) == (LONG)bfbfInitial.bits ) \
{ \
break; \
} \
} \
}
public:
// Initialize all bits with zero.
CAtomicBfBitField()
{
m_bfbf.bits = 0;
}
// Getter/setter declaration.
BitFieldDecl( BOOL, FDependentPurged );
BitFieldDecl( BOOL, FImpedingCheckpoint );
BitFieldDecl( BOOL, FRangeLocked );
};
C_ASSERT( sizeof( CAtomicBfBitField ) == sizeof( FLAG32 ) );
// BF struct
struct BF;
typedef BF* PBF;
const PBF pbfNil = PBF( 0 );
typedef LONG_PTR IBF;
const IBF ibfNil = IBF( -1 );
typedef IBF CBF;
struct BF // BF -- IFMP/PGNO buffer state
{
BF()
: ifmp( ifmpNil ),
err( JET_errSuccess ),
fNewlyEvicted( fFalse ),
fQuiesced( fTrue ),
fAvailable( fFalse ),
fWARLatch( fFalse ),
bfdf( bfdfClean ),
fInOB0OL( fFalse ),
irangelock( 0 ),
fCurrentVersion( fFalse ),
fOlderVersion( fFalse ),
fFlushed( fFalse ),
bfls( bflsNormal ),
sxwl( CLockBasicInfo( CSyncBasicInfo( szBFLatch ), rankBFLatch, CLockDeadlockDetectionInfo::subrankNoDeadlock ) ),
lgposOldestBegin0( lgposMax ),
lgposModify( lgposMin ),
rbsposSnapshot( rbsposMin ),
prceUndoInfoNext( prceNil ),
pgno( pgnoNull ),
tce( tceNone ),
pbfTimeDepChainPrev( pbfNil ),
pbfTimeDepChainNext( pbfNil ),
pv( NULL ),
bfrs( bfrsNotCommitted ),
fLazyIO( fFalse ),
pWriteSignalComplete( NULL ), // and thus pbfNext( NULL )
icbPage( icbPageInvalid ),
icbBuffer( icbPageInvalid ),
fSuspiciouslySlowRead( fFalse ),
fSyncRead( fFalse ),
bfat( bfatNone ),
fAbandoned( fFalse ),
pvIOContext( NULL )
{
const TICK tickNow = TickOSTimeCurrent();
tickEligibleForNomination = tickNow;
tickLastDirtied = tickNow;
}
~BF()
{
}
void Dump( CPRINTF* pcprintf, DWORD_PTR dwOffset = 0 ) const; // dumps BF state
static SIZE_T OffsetOfLRUKIC() { return OffsetOf( BF, lrukic ); }
static SIZE_T OffsetOfAPIC() { return OffsetOf( BF, lrukic ); }
C_ASSERT( sizeof( CPool< BF, BF::OffsetOfAPIC >::CInvasiveContext ) <= sizeof( CLRUKResourceUtilityManager< Kmax, BF, OffsetOfLRUKIC, IFMPPGNO >::CInvasiveContext ) );
static SIZE_T OffsetOfQPIC() { return OffsetOf( BF, lrukic ); }
C_ASSERT( sizeof( CInvasiveList< BF, BF::OffsetOfQPIC >::CElement ) <= sizeof( CLRUKResourceUtilityManager< Kmax, BF, OffsetOfLRUKIC, IFMPPGNO >::CInvasiveContext ) );
static SIZE_T OffsetOfOB0IC() { return OffsetOf( BF, ob0ic ); }
static SIZE_T OffsetOfOB0OLILE() { return OffsetOf( BF, ob0ic ); }
C_ASSERT( sizeof( CInvasiveList< BF, BF::OffsetOfOB0OLILE >::CElement ) <= sizeof( CApproximateIndex< LGPOS, BF, OffsetOfOB0IC >::CInvasiveContext ) );
#ifdef _WIN64
// 0 B /////////////////////////////////////////////////////////////////////////////////////
// UNION: This is a union, but hidden via the OffsetOfOB0IC() / OffsetOfOB0OLILE() functions.
CApproximateIndex< LGPOS, BF, OffsetOfOB0IC >::CInvasiveContext ob0ic;
// Invasive Context for the OldestBegin0 index or
// Invasive List Element for the Overflow List
LGPOS lgposOldestBegin0; // log position of Begin0 of the oldest
// transaction to dirty this IFMP/PGNO
LGPOS lgposModify; // log position of most recent log record
// to reference this IFMP/PGNO
// 32 B //////////////////////////////////////////////////////////////////////////////////////
CSXWLatch sxwl; // S/X/W Latch protecting this BF state and
// its associated cached page
// 64 B //////////////////////////////////////////////////////////////////////////////////////
IFMP ifmp; // IFMP of this cached page
PGNO pgno; // PGNO of this cached page
TICK tickLastDirtied; // time when the BF was last dirtied (unique pages modified)
SHORT err; // I/O error
BYTE fLazyIO:1; // BF is issued at non-immediate dispatch priority
BYTE fNewlyEvicted:1; // BF cache memory is newly evicted
BYTE fQuiesced:1; // BF is quiesced for shrinking the cache
BYTE fAvailable:1; // BF is in the avail pool
BYTE fReserved4:1; // Available for re-use (was fMemory)
BYTE fWARLatch:1; // BF is WAR Latched (valid only if exclusively latched)
BYTE bfdf:2; // BF dirty flags
BYTE fInOB0OL:1; // BF is in the Oldest Begin 0 index Overflow List
BYTE irangelock:1; // active rangelock for this attempted flush
BYTE fCurrentVersion:1; // BF contains the current version of this IFMP / PGNO
BYTE fOlderVersion:1; // BF contains an older version of this IFMP / PGNO
BYTE fFlushed:1; // BF has been successfully flushed at least once
BYTE bfls:3; // BF latch state
union
{
ULONG iHashedLatch; // bflsHashed: offset of hashed latch in PLS
TICK tickEligibleForNomination; // !bflsHashed: time BF is eligible for nomination
TICK tickViewLastRefreshed; // [bfat==bfatViewMapped]: tick of the last time all pages were definitely read (typically within an exception handler)
};
BFResidenceState bfrs; // BF residence state
void* pv; // Cached page image
// 96 B //////////////////////////////////////////////////////////////////////////////////////
union
{
volatile ULONG_PTR pWriteSignalComplete; // write IO completion signal/information
// volatile PBF pbfNext; // future, this will be used for evict immediate
};
// Important: Next 4 bytes protected from modify x-latch, can be read locklessly
BYTE icbPage:4; // Index into g_rgcbPageSize[] for the size of the page
BYTE icbBuffer:4; // Index into g_rgcbPageSize[] for the CURRENT size of the buffer (dehydrated)
BYTE fSuspiciouslySlowRead:1; // BF is for IO that has a suspiciously slow read
BYTE fSyncRead:1; // BF was read synchronously (otherwise BF was read async)
BYTE bfat:2; // Method used to allocate the pv buffer
BYTE fAbandoned:1; // Client abandoned this page, ok to purge / evict with dirty data immediately
BYTE grbitReserved:3;
BYTE rgbReserved2[ 1 ]; // Free space
TCE tce; // table class for perfmon tracking of pages
CAtomicBfBitField bfbitfield; // Bit field that can be read from and written to without any latches.
volatile PBF pbfTimeDepChainPrev; // prev BF in our time dependency chain
volatile PBF pbfTimeDepChainNext; // next BF in our time dependency chain
// 128 B /////////////////////////////////////////////////////////////////////////////////////
RCE* prceUndoInfoNext; // Undo Info chain
void* pvIOContext; // I/O context (in practice, an IOREQ)
// 144 B /////////////////////////////////////////////////////////////////////////////////////
CLRUKResourceUtilityManager< Kmax, BF, OffsetOfLRUKIC, IFMPPGNO >::CInvasiveContext lrukic;
// Invasive Context the LRUK Resource Utility Manager, Avail pool
// or Quiesced list.
// 184 B /////////////////////////////////////////////////////////////////////////////////////
RBS_POS rbsposSnapshot; // Position of the rollback snapshot containing preimage for this page
// 192 B (64-bit/cache aligned) ///////////////////////////////////////////////////////////////
#else // !_WIN64
// 0 B /////////////////////////////////////////////////////////////////////////////////////
CApproximateIndex< LGPOS, BF, OffsetOfOB0IC >::CInvasiveContext ob0ic;
// Invasive Context for the OldestBegin0 index or
// Invasive List Element for the Overflow List
LGPOS lgposOldestBegin0; // log position of Begin0 of the oldest
// transaction to dirty this IFMP/PGNO
LGPOS lgposModify; // log position of most recent log record
// to reference this IFMP/PGNO
volatile PBF pbfTimeDepChainPrev; // prev BF in our time dependency chain
volatile PBF pbfTimeDepChainNext; // next BF in our time dependency chain (aka "older")
// 32 B //////////////////////////////////////////////////////////////////////////////////////
CSXWLatch sxwl; // S/X/W Latch protecting this BF state and
// its associated cached page
void* pvIOContext; // I/O context (in practice, an IOREQ)
IFMP ifmp; // IFMP of this cached page
PGNO pgno; // PGNO of this cached page
// 64 B //////////////////////////////////////////////////////////////////////////////////////
void* pv; // Cached page image
SHORT err; // I/O error
BYTE fLazyIO:1; // BF is issued at non-immediate dispatch priority
BYTE fNewlyEvicted:1; // BF cache memory is newly evicted
BYTE fQuiesced:1; // BF is quiesced for shrinking the cache
BYTE fAvailable:1; // BF is in the avail pool
BYTE fReserved4:1; // Available for re-use (was fMemory)
BYTE fWARLatch:1; // BF is WAR Latched (valid only if exclusively latched)
BYTE bfdf:2; // BF dirty flags
BYTE fInOB0OL:1; // BF is in the Oldest Begin 0 index Overflow List
BYTE irangelock:1; // active rangelock for this attempted flush
BYTE fCurrentVersion:1; // BF contains the current version of this IFMP / PGNO
BYTE fOlderVersion:1; // BF contains an older version of this IFMP / PGNO
BYTE fFlushed:1; // BF has been successfully flushed at least once
BYTE bfls:3; // BF latch state
union
{
ULONG iHashedLatch; // bflsHashed: offset of hashed latch in PLS
TICK tickEligibleForNomination; // !bflsHashed: time BF is eligible for nomination
TICK tickViewLastRefreshed; // [bfat==bfatViewMapped]: tick of the last time all pages were definitely read (typically within an exception handler)
};
BFResidenceState bfrs; // BF residence state
// 80 B //////////////////////////////////////////////////////////////////////////////////////
RCE* prceUndoInfoNext; // Undo Info chain
CAtomicBfBitField bfbitfield; // Bit field that can be read from and written to without any latches.
// 88 B //////////////////////////////////////////////////////////////////////////////////////
CLRUKResourceUtilityManager< Kmax, BF, OffsetOfLRUKIC, IFMPPGNO >::CInvasiveContext lrukic;
// Invasive Context the LRUK Resource Utility Manager, Avail pool
// or Quiesced list.
// 120 B /////////////////////////////////////////////////////////////////////////////////////
union
{
volatile ULONG_PTR pWriteSignalComplete; // write IO completion signal/information
// volatile PBF pbfNext; // future, this will be used for evict immediate
};
// Important: Next 4 bytes protected from modify x-latch, can be read locklessly
BYTE icbPage:4; // Index into g_rgcbPageSize[] for the size of the page
BYTE icbBuffer:4; // Index into g_rgcbPageSize[] for the CURRENT size of the buffer (dehydrated)
BYTE fSuspiciouslySlowRead:1; // BF is for IO that has a suspiciously slow read
BYTE fSyncRead:1; // BF was read synchronously (otherwise BF was read async)
BYTE bfat:2; // Method used to allocate the pv buffer
BYTE fAbandoned:1; // Client abandoned this page, ok to purge / evict with dirty data immediately
BYTE grbitReserved:3;
BYTE rgbReserved1[ 1 ]; // Free space
TCE tce; // table class for perfmon tracking of pages
// 128 B (64-bit/cache aligned) //////////////////////////////////////////////////////////////
RBS_POS rbsposSnapshot; // Position of the rollback snapshot containing preimage for this page
TICK tickLastDirtied; // time when the BF was last dirtied (unique pages modified)
BYTE rgbReserved3[ 20 ]; // Free space
// 160 B (64-bit/cache aligned) ///////////////////////////////////////////////////////////////
#endif // _WIN64
};
// The CInvasiveList class will return NULL for the Prev()/Next() pointers that are returned IF it gets
// to the end AND IF (ONLY IF) the invasive context is at the beginning of (offset 0) the datastructure.
// BUT IF the invasive context is not at the beginning of the datastructure the experience of the Prev()
// or Next() pointers are return as this very odd / unpleasant negative offset.
C_ASSERT( OffsetOf( BF, ob0ic ) == 0 );
// Atomic bit fields must be DWORD-aligned so that we can use interlocked operations to read from and write to them
// consistently.
C_ASSERT( ( OffsetOf( BF, bfbitfield ) % sizeof( FLAG32 ) ) == 0 );
C_ASSERT( sizeof( BF::bfbitfield ) == sizeof( FLAG32 ) );
// Be conscious of the size if you're changing it ...
#ifdef _WIN64
C_ASSERT( sizeof(BF) == 192 );
#else // !_WIN64
C_ASSERT( sizeof(BF) == 176 );
#endif // _WIN64
// Buffer Manager Global Flags
extern BOOL g_fBFInitialized;
extern BYTE* g_rgbBFTemp;
extern TICK g_tickBFPreparingCrashDump;
extern size_t cBFInspectedForInclusionInCrashDump;
extern size_t cBFMismatchedVMPageIncludedInCrashDump;
extern size_t cBFDirtiedPageIncludedInCrashDump;
extern size_t cBFCachedPageIncludedInCrashDump;
extern size_t cBFLatchedPageIncludedInCrashDump;
extern size_t cBFReferencedPageIncludedInCrashDump;
extern size_t cBFRecentlyTouchedPageIncludedInCrashDump;
extern size_t cBFErrorIncludedInCrashDump;
extern size_t cBFIOIncludedInCrashDump;
extern size_t cBFUnverifiedIncludedInCrashDump;
extern size_t cBFMayBeRemovedFromCrashDump;
extern size_t cBFVMPagesIncludedInCrashDump;
extern size_t cBFVMPagesRemovedFromCrashDump;
extern TICK g_tickBFCrashDumpPrepared;
extern ERR g_errBFCrashDumpResult;
extern BOOL g_fBFErrorBuildingReferencedPageListForCrashDump;
// Buffer Manager Global Statistics
extern ULONG cBFOpportuneWriteIssued;
// Buffer Manager Global Constants
extern double g_dblBFSpeedSizeTradeoff;
// IFMP/PGNO Hash Table
struct PGNOPBF
{
PGNOPBF() {}
PGNOPBF( PGNO pgnoIn, PBF pbfIn )
: pgno( pgnoIn ),
pbf( pbfIn )
{
}
BOOL operator==( const PGNOPBF& pgnopbf ) const
{
return pbf == pgnopbf.pbf; // pbf alone uniquely identifies this entry
}
const PGNOPBF& operator=( const PGNOPBF& pgnopbf )
{
pgno = pgnopbf.pgno;
pbf = pgnopbf.pbf;
return *this;
}
PGNO pgno;
PBF pbf;
};
typedef CDynamicHashTable< IFMPPGNO, PGNOPBF > BFHash;
inline BFHash::NativeCounter HashIfmpPgno( const IFMP ifmp, const PGNO pgno )
{
// CONSIDER: revise this hash function
return BFHash::NativeCounter( pgno + ( ifmp << 13 ) + ( pgno >> 17 ) );
}
inline BFHash::NativeCounter BFHash::CKeyEntry::Hash( const IFMPPGNO& ifmppgno )
{
return HashIfmpPgno( ifmppgno.ifmp, ifmppgno.pgno );
}
inline BFHash::NativeCounter BFHash::CKeyEntry::Hash() const
{
return HashIfmpPgno( m_entry.pbf->ifmp, m_entry.pgno );
}
inline BOOL BFHash::CKeyEntry::FEntryMatchesKey( const IFMPPGNO& ifmppgno ) const
{
return m_entry.pgno == ifmppgno.pgno && m_entry.pbf->ifmp == ifmppgno.ifmp;
}
inline void BFHash::CKeyEntry::SetEntry( const PGNOPBF& pgnopbf )
{
m_entry = pgnopbf;
}
inline void BFHash::CKeyEntry::GetEntry( PGNOPBF* const ppgnopbf ) const
{
*ppgnopbf = m_entry;
}
// IFMPPGNO uses the same hash function
inline ULONG_PTR IFMPPGNO::Hash() const
{
return (ULONG_PTR) HashIfmpPgno( this->ifmp, this->pgno );
}
extern BFHash g_bfhash;
extern double g_dblBFHashLoadFactor;
extern double g_dblBFHashUniformity;
// Avail Pool
typedef CPool< BF, BF::OffsetOfAPIC > BFAvail;
extern BFAvail g_bfavail;
// Quiesced List
typedef CInvasiveList< BF, BF::OffsetOfQPIC > BFQuiesced;
extern BFQuiesced g_bfquiesced;
// lookaside cache for BFAlloc
class CSmallLookasideCache
{
public:
CSmallLookasideCache();
~CSmallLookasideCache();
void Init( const INT cbBufferSize );
void Term();
void * PvAlloc();
void Free( void * const pb );
INT CbBufferSize() const;
#ifdef DEBUGGER_EXTENSION
// This should be concurrently safe (though probably would give you a stale value). But it
// was intended only for debugger use, as it won't be very efficient as the lookaside list
// is rather long.
__int64 CbCacheSize()
{
__int64 cbTotal = 0;
for( LONG i = 0; i < CSmallLookasideCache::m_cLocalLookasideBuffers; i++ )
{
if ( m_rgpvLocalLookasideBuffers[i] )
{
cbTotal += m_cbBufferSize;
}
}
return cbTotal;
}
#endif
private:
INT m_cbBufferSize;
static const INT m_cLocalLookasideBuffers = ( 128 /* large CPU cache line */ / sizeof(void*) ) * 16 /* 16 CPUs worst case */;
void * m_rgpvLocalLookasideBuffers[m_cLocalLookasideBuffers];
// stats
#ifdef DEBUG
#define MEMORY_STATS_TRACKING
#endif
#ifdef MEMORY_STATS_TRACKING
QWORD m_cHits;
QWORD m_cAllocs;
QWORD m_cFrees; // essentially overallocs
QWORD m_cFailures;
#endif
private:
CSmallLookasideCache( const CSmallLookasideCache& );
CSmallLookasideCache& operator=( const CSmallLookasideCache& );
};
#ifdef DEBUG
// Lookaside buffer for validating pages in the IO thread
extern void * g_pvIoThreadImageCheckCache;
#endif
// LRUK
DECLARE_LRUK_RESOURCE_UTILITY_MANAGER( Kmax, BF, BF::OffsetOfLRUKIC, IFMPPGNO, BFLRUK );
extern BFLRUK g_bflruk;
extern double g_csecBFLRUKUncertainty;
// BF FTL Tracing
ERR ErrBFIFTLInit();
void BFIFTLTerm();
// BF tracing
INLINE void BFITraceResMgrInit(
const INT K,
const double csecCorrelatedTouch,
const double csecTimeout,
const double csecUncertainty,
const double dblHashLoadFactor,
const double dblHashUniformity,
const double dblSpeedSizeTradeoff );
INLINE void BFITraceResMgrTerm();
INLINE void BFITraceCachePage(
const TICK tickCache,
const PBF pbf,
const BFLatchType bflt,
const ULONG pctPriority,
const BFLatchFlags bflf,
const BFRequestTraceFlags bfrtf,
const TraceContext& tc );
void BFITraceNewPageIdentity( const PBF pbf );
INLINE void BFITraceRequestPage(
const TICK tickTouch,
const PBF pbf,
const ULONG pctPriority,
const BFLatchType bflt,
const BFLatchFlags bflf,
const BFRequestTraceFlags bfrtf,
const TraceContext& tc );
INLINE void BFITraceMarkPageAsSuperCold(
const IFMP ifmp,
const PGNO pgno );
INLINE void BFITraceEvictPage(
const IFMP ifmp,
const PGNO pgno,
const BOOL fCurrentVersion,
const ERR errBF,
const ULONG bfef );
INLINE void BFITraceDirtyPage(
const PBF pbf,
const BFDirtyFlags bfdf,
const TraceContext& tc );
INLINE void BFITraceWritePage(
const PBF pbf,
const FullTraceContext& tc );
INLINE void BFITraceSetLgposModify(
const PBF pbf,
const LGPOS& lgposModify );
// Oldest Begin 0 Index and Overflow List
DECLARE_APPROXIMATE_INDEX( QWORD, BF, BF::OffsetOfOB0IC, BFOB0 );
typedef CInvasiveList< BF, BF::OffsetOfOB0OLILE > BFOB0OverflowList;
QWORD BFIOB0Offset( const IFMP ifmp, const LGPOS* const plgpos );
INLINE LGPOS BFIOB0Lgpos( const IFMP ifmp, LGPOS lgpos, const BOOL fNextBucket = fFalse );
//
// structs to maintain a histogram of something vs log generation
//
struct LogHistData
{
LONG m_lgenBase; // min log generation we hold for histogram
LONG m_cgen; // # of log generations in histogram
LONG* m_rgc; // histogram, m_rgc = new LONG[ m_cgenb ];
LONG m_cOverflow; // overflow bucket
LogHistData( void )
: m_lgenBase( 0 ),
m_cgen( 0 ),
m_rgc( NULL ),
m_cOverflow( 0 )
{
}
~LogHistData( void )
{
delete[] m_rgc;
m_rgc = NULL;
}
};
struct BFSTAT
{
LONG m_cBFMod; // modified BFs
LONG m_cBFPin; // pinned BFs
BFSTAT( LONG cBFMod, LONG cBFPin )
: m_cBFMod( cBFMod ),
m_cBFPin( cBFPin )
{
}
BFSTAT( BFSTAT& copy )
{
m_cBFMod = copy.m_cBFMod;
m_cBFPin = copy.m_cBFPin;
}
BFSTAT& operator=( BFSTAT& other )
{
m_cBFMod = other.m_cBFMod;
m_cBFPin = other.m_cBFPin;
return *this;
}
};
struct BFLogHistogram
{
CCriticalSection m_crit;
CMeteredSection m_ms;
LogHistData m_rgdata[ 2 ];
BFLogHistogram( void )
: m_crit( CLockBasicInfo( CSyncBasicInfo( szBFLgposModifyHist ), rankBFLgposModifyHist, 0 ) )
{
}
~BFLogHistogram( void )
{
}
VOID Update( const LGPOS lgposOld, const LGPOS lgposNew, IFMP ifmp );
VOID ReBase( IFMP ifmp, LONG lgenLatest );
static BFSTAT Read( void );
enum { cgenNewMin = 64 };
};
struct BFFMPContext
{
BFFMPContext()
: bfob0( rankBFOB0 ),
critbfob0ol( CLockBasicInfo( CSyncBasicInfo( szBFOB0 ), rankBFOB0, 0 ) )
{
memset( &ChkAdvData, 0, sizeof(ChkAdvData) );
tickMaintCheckpointDepthLast = TickOSTimeCurrent();
tickMaintCheckpointDepthNext = tickMaintCheckpointDepthLast - dtickMaintCheckpointDepthDelay;
errLastCheckpointMaint = JET_errSuccess;
lgposFlusherBM = lgposMin;
lgposVersionerBM = lgposMin;
lgposLastLogTip = lgposMin;
lgposOldestBegin0Last = lgposMax;
lgposNewestModify = lgposMin;
fCurrentlyAttached = fFalse;
}
void Dump( CPRINTF* pcprintf, DWORD_PTR dwOffset = 0 ) const; // dumps BFFMPContext state
BFOB0 bfob0;
CCriticalSection critbfob0ol;
// protected by critbfob0ol
BFOB0OverflowList bfob0ol;
//
// Checkpoint depth maint properties.
//
// protected implicitly by 1 checkpoint thread on this struct
ERR errLastCheckpointMaint;
TICK tickMaintCheckpointDepthLast;
LGPOS lgposFlusherBM;
LGPOS lgposVersionerBM;
LGPOS lgposLastLogTip;
typedef struct
{
ULONG cEntriesVisited;
ULONG cCleanRemoved;
ULONG cFlushErrSuccess;
ULONG cFlushErrOther;
ULONG cFlushErrPageFlushed;
ULONG cFlushErrPageFlushPending;
ULONG cFlushErrRemainingDependencies;
ULONG cFlushErrDependentPurged;
ULONG cFlushErrLatchConflict;
ULONG cFlushErrPageTouchTooRecent;
} ChkAdvStats;
ChkAdvStats ChkAdvData;
// not protected, but OK.
TICK tickMaintCheckpointDepthNext; // The next soonest time this should run. (unless IO completes)
// maximum lgposModify for any cached BF that is found to be pinned by LLR
//
LGPOS lgposNewestModify;
// last value returned by BFGetOldestLgposBegin0 (lgposMax = not set)
//
LGPOS lgposOldestBegin0Last;
//
// histogram for BF/LogPosModify.lGeneration
// (how many BFs are modified by each log generation)
//
BFLogHistogram m_logHistModify;
BYTE fCurrentlyAttached:1; // BFFMPContext currently has an attached database
BYTE m_rgbReserved[ 7 ];
};
// Deferred Undo Information
extern CRITPOOL< BF > g_critpoolBFDUI;
// Cache
extern CSmallLookasideCache* g_pBFAllocLookasideList;
extern CCriticalSection g_critCacheSizeSetTarget;
extern CCriticalSection g_critCacheSizeResize;
extern BOOL g_fBFCacheInitialized;
extern LONG_PTR g_cbfCacheUserOverride;
extern volatile LONG_PTR cbfCacheTarget;
extern volatile LONG_PTR g_cbfCacheTargetOptimal;
extern LONG g_rgcbfCachePages[icbPageMax];
extern volatile LONG_PTR cbfCacheAddressable;
extern volatile LONG_PTR cbfCacheSize;
extern LONG g_cbfCacheResident;
extern LONG g_cbfCacheClean;
extern ICBPage g_icbCacheMax;
extern DWORD g_cbfNewlyCommitted;
extern DWORD g_cbfNewlyEvictedUsed;
extern DWORD g_cpgReclaim;
extern DWORD g_cResidenceCalc;
extern ULONG_PTR g_cbCacheCommittedSize;
extern ULONG_PTR g_cbCacheReservedSize;
extern LONG_PTR g_cpgChunk;
extern void** g_rgpvChunk;
extern LONG_PTR cbfInit;
extern LONG_PTR g_cbfChunk;
extern BF** g_rgpbfChunk;
ERR ErrBFICacheInit( __in const LONG cbPageSizeMax );
void BFICacheTerm();
enum eResidentCacheStatusChange
{
eResidentCacheStatusNoChange = 0,
eResidentCacheStatusDrop,
eResidentCacheStatusRestore,
};
struct BFCacheStatsChanges : public CZeroInit // BFCacheStatsChanges derives from CZeroInit so we don't have to remember to
{ // initililze it with { 0 } in case of local variables (as in the unit tests).
__int64 ftResidentLastEvent;
eResidentCacheStatusChange eResidentLastEventType;
LONG cbfResidentLast;
LONG cbfCacheLast;
eResidentCacheStatusChange eResidentCurrentEventType;
__int64 csecLastEventDelta;
BFCacheStatsChanges() : CZeroInit( sizeof( BFCacheStatsChanges ) )
{
C_ASSERT( eResidentCacheStatusNoChange == 0 );
}
};
void BFICacheIResetTarget();
void BFICacheSetTarget( OnDebug( const LONG_PTR cbfCacheOverrideCheck ) );
ERR ErrBFICacheGrow();
void BFICacheIShrinkAddressable();
void BFICacheIFree();
void BFICacheINotifyCacheSizeChanges(
const LONG_PTR cbfCacheAddressableInitial,
const LONG_PTR cbfCacheSizeInitial,
const LONG_PTR cbfCacheAddressableFinal,
const LONG_PTR cbfCacheSizeFinal );
ERR ErrBFICacheUpdateStatistics();
INLINE BOOL FBFICacheValidPv( const void* const pv );
INLINE BOOL FBFICacheValidPbf( const PBF pbf );
INLINE PBF PbfBFICacheIbf( const IBF ibf );
INLINE void* PvBFICacheIpg( const IPG ipg );
IBF IbfBFICachePbf( const PBF pbf );
IPG IpgBFICachePv( const void* const pv );
ERR ErrBFICacheISetSize( const LONG_PTR cbfCacheNew );
// Cache Resource Allocation Manager
class CCacheRAM
: public CDBAResourceAllocationManager< cMaintCacheSamplesAvg >
{
public:
CCacheRAM();
virtual ~CCacheRAM();
void Reset();
DWORD CpgReclaim() { return m_cpgReclaimNorm; }
DWORD CpgEvict() { return m_cpgEvictNorm; }
virtual void UpdateStatistics();
virtual void ConsumeResourceAdjustments( __out double * const pdcbTotalResource, __in const double cbResourceSize );
void OverrideResourceAdjustments( double const dcbRource );
virtual size_t TotalPhysicalMemory();
virtual size_t AvailablePhysicalMemory();
protected:
virtual size_t TotalPhysicalMemoryEvicted();
virtual QWORD TotalResources();
virtual QWORD TotalResourcesEvicted();
public:
QWORD GetOptimalResourcePoolSize();
void SetOptimalResourcePoolSize();
private:
DWORD m_cpgReclaimCurr;
DWORD m_cpgReclaimLast;
DWORD m_cpgReclaimNorm;
DWORD m_cpgEvictCurr;
DWORD m_cpgEvictLast;
DWORD m_cpgEvictNorm;
DWORD m_cpgPhysicalMemoryEvictedLast;
size_t m_cbTotalPhysicalMemoryEvicted;
DWORD m_cbTotalResourcesEvictedLast;
QWORD m_cbTotalResourcesEvicted;
LONG_PTR m_cbfCacheNewDiscrete;
QWORD m_cbOptimalResourcePoolSizeUsedLast;
double m_dcbAdjustmentOverride;
};
extern CCacheRAM g_cacheram;
bool FBFIFaultInBuffer( const PBF pbf, LONG * pcmmpgReclaimed = NULL );
// Issue List
class CBFIssueList
{
public:
CBFIssueList();
~CBFIssueList();
ERR ErrPrepareWrite( const IFMP ifmp );
ERR ErrPrepareLogWrite( const IFMP ifmp );
ERR ErrPrepareRBSWrite( const IFMP ifmp );
VOID NullifyDiskTiltFake( const IFMP ifmp );
ERR ErrIssue( const BOOL fSync = fFalse );
VOID AbandonLogOps();
BOOL FEmpty() const;
static ERR ErrSync();
private:
class CEntry
{
public:
enum eOper
{
operWrite,
operLogWrite,