forked from pmem/CacheLib
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCacheStats.h
More file actions
670 lines (512 loc) · 19.2 KB
/
CacheStats.h
File metadata and controls
670 lines (512 loc) · 19.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <algorithm>
#include <numeric>
#include "cachelib/allocator/Util.h"
#include "cachelib/allocator/memory/MemoryAllocator.h"
#include "cachelib/allocator/memory/MemoryAllocatorStats.h"
#include "cachelib/allocator/memory/Slab.h"
#include "cachelib/common/FastStats.h"
#include "cachelib/common/PercentileStats.h"
#include "cachelib/common/RollingStats.h"
#include "cachelib/common/Time.h"
namespace facebook {
namespace cachelib {
// stats class for a single eviction queue
struct EvictionStatPerType {
// the age of the oldest element in seconds
uint64_t oldestElementAge = 0ULL;
// number of elements in the eviction queue
uint64_t size = 0ULL;
// the estimated age after removing a slab worth of elements
uint64_t projectedAge = 0ULL;
};
// stats class for one MM container (a.k.a one allocation class) related to
// evictions
struct EvictionAgeStat {
EvictionStatPerType warmQueueStat;
EvictionStatPerType hotQueueStat;
EvictionStatPerType coldQueueStat;
};
// stats related to evictions for a pool
struct PoolEvictionAgeStats {
// Map from allocation class id to the eviction age stats
std::unordered_map<ClassId, EvictionAgeStat> classEvictionAgeStats;
uint64_t getOldestElementAge(ClassId cid) const {
return classEvictionAgeStats.at(cid).warmQueueStat.oldestElementAge;
}
const EvictionStatPerType& getWarmEvictionStat(ClassId cid) const {
return classEvictionAgeStats.at(cid).warmQueueStat;
}
const EvictionStatPerType& getHotEvictionStat(ClassId cid) const {
return classEvictionAgeStats.at(cid).hotQueueStat;
}
const EvictionStatPerType& getColdEvictionStat(ClassId cid) const {
return classEvictionAgeStats.at(cid).coldQueueStat;
}
};
// Stats for MM container
struct MMContainerStat {
// number of elements in the container.
size_t size;
// what is the unix timestamp in seconds of the oldest element existing in
// the container.
uint64_t oldestTimeSec;
// refresh time for LRU
uint64_t lruRefreshTime;
// TODO: Make the MMContainerStat generic by moving the Lru/2Q specific
// stats inside MMType and exporting them through a generic stats interface.
// number of hits in each lru.
uint64_t numHotAccesses;
uint64_t numColdAccesses;
uint64_t numWarmAccesses;
uint64_t numTailAccesses;
};
struct AllocationClassBaseStat {
// size of allocation class
size_t allocSize{0};
// size of memory assigned to this allocation class
size_t memorySize{0};
// percent of free memory in this class
double approxFreePercent{0.0};
// Rolling allocation latency (in ns)
util::RollingStats allocLatencyNs;
};
// cache related stats for a given allocation class.
struct CacheStat {
// allocation size for this container.
uint32_t allocSize;
// number of attempts to allocate memory
uint64_t allocAttempts{0};
// number of eviction attempts
uint64_t evictionAttempts{0};
// number of failed attempts
uint64_t allocFailures{0};
// total fragmented memory size in bytes
uint64_t fragmentationSize{0};
// number of hits for this container.
uint64_t numHits;
// number of evictions from this class id that was of a chained item
uint64_t chainedItemEvictions;
// number of regular items that were evicted from this classId
uint64_t regularItemEvictions;
// the stats from the mm container
MMContainerStat containerStat;
uint64_t numItems() const noexcept { return numEvictableItems(); }
// number of elements in this MMContainer
size_t numEvictableItems() const noexcept { return containerStat.size; }
// total number of evictions.
uint64_t numEvictions() const noexcept {
return chainedItemEvictions + regularItemEvictions;
}
// the current oldest item in the container in seconds.
uint64_t getEvictionAge() const noexcept {
return containerStat.oldestTimeSec != 0
? util::getCurrentTimeSec() - containerStat.oldestTimeSec
: 0;
}
};
// Stats for a pool
struct PoolStats {
// pool name given by users of this pool.
std::string poolName;
// true if the pool is a compact cache pool.
bool isCompactCache;
// total pool size assigned by users when adding pool.
uint64_t poolSize;
// total size of the pool that is actively usable, taking advising into
// account
uint64_t poolUsableSize;
// total size of the pool that is set to be advised away.
uint64_t poolAdvisedSize;
// container stats that provide evictions etc.
std::unordered_map<ClassId, CacheStat> cacheStats;
// stats from the memory allocator perspective. this is a map of MPStat
// for each allocation class that this pool has.
MPStats mpStats;
// number of get hits for this pool.
uint64_t numPoolGetHits;
// estimates for eviction age for items in this pool
util::PercentileStats::Estimates evictionAgeSecs{};
const std::set<ClassId>& getClassIds() const noexcept {
return mpStats.classIds;
}
// number of attempts to allocate
uint64_t numAllocAttempts() const;
// number of attempts to evict
uint64_t numEvictionAttempts() const;
// number of attempts that failed
uint64_t numAllocFailures() const;
// toal memory fragmentation size of this pool.
uint64_t totalFragmentation() const;
// total number of free allocs for this pool
uint64_t numFreeAllocs() const noexcept;
// amount of cache memory that is not allocated.
size_t freeMemoryBytes() const noexcept;
// number of evictions for this pool
uint64_t numEvictions() const noexcept;
// number of all items in this pool
uint64_t numItems() const noexcept;
// number of evictable items
uint64_t numEvictableItems() const noexcept;
// total number of allocations currently in this pool
uint64_t numActiveAllocs() const noexcept;
// number of hits for an alloc class in this pool
uint64_t numHitsForClass(ClassId cid) const {
return cacheStats.at(cid).numHits;
}
// number of slabs in this class id
uint64_t numSlabsForClass(ClassId cid) const {
return mpStats.acStats.at(cid).totalSlabs();
}
// alloc size corresponding to the class id
uint32_t allocSizeForClass(ClassId cid) const {
return cacheStats.at(cid).allocSize;
}
// mm container eviction age for the class
uint64_t evictionAgeForClass(ClassId cid) const {
return cacheStats.at(cid).getEvictionAge();
}
// total free allocs for the class
uint64_t numFreeAllocsForClass(ClassId cid) const {
return mpStats.acStats.at(cid).freeAllocs;
}
// This is the real eviction age of this pool as this number
// guarantees the time any item inserted into this pool will live
// ignores the classIds that are not used.
uint64_t minEvictionAge() const;
// computes the maximum eviction age across all class Ids
uint64_t maxEvictionAge() const;
// aggregate this pool stats with another that is compatible. To be
// compatible, they need to have the same number of classIds
//
// throws when the operation is not compatible.
PoolStats& operator+=(const PoolStats& other);
};
// Stats for slab release events
struct SlabReleaseStats {
uint64_t numActiveSlabReleases;
uint64_t numSlabReleaseForRebalance;
uint64_t numSlabReleaseForResize;
uint64_t numSlabReleaseForAdvise;
uint64_t numSlabReleaseForRebalanceAttempts;
uint64_t numSlabReleaseForResizeAttempts;
uint64_t numSlabReleaseForAdviseAttempts;
uint64_t numMoveAttempts;
uint64_t numMoveSuccesses;
uint64_t numEvictionAttempts;
uint64_t numEvictionSuccesses;
uint64_t numSlabReleaseStuck;
};
// Stats for reaper
struct ReaperStats {
// the total number of items the reaper has visited.
uint64_t numVisitedItems{0};
// the number of items reaped.
uint64_t numReapedItems{0};
uint64_t numVisitErrs{0};
// number of times we went through the whole cache
uint64_t numTraversals{0};
// indicates the time in ms for the last iteration across the entire cache
uint64_t lastTraversalTimeMs{0};
// indicates the maximum of all traversals
uint64_t minTraversalTimeMs{0};
// indicates the minimum of all traversals
uint64_t maxTraversalTimeMs{0};
// indicates the average of all traversals
uint64_t avgTraversalTimeMs{0};
};
// CacheMetadata type to export
struct CacheMetadata {
// allocator_version
int allocatorVersion;
// ram_format_version
int ramFormatVersion;
// nvm_format_version
int nvmFormatVersion;
// cache_total_size
size_t cacheSize;
};
// forward declaration
namespace detail {
struct Stats;
}
// Stats that apply globally in cache and
// the ones that are aggregated over all pools
struct GlobalCacheStats {
// number of calls to CacheAllocator::find
uint64_t numCacheGets{0};
// number of such calls being a miss in the cache.
uint64_t numCacheGetMiss{0};
// number of such calls being an expiry in the cache. This is also included
// in the numCacheGetMiss stats above.
uint64_t numCacheGetExpiries{0};
// number of remove calls to CacheAllocator::remove that requires
// a lookup first and then remove the item
uint64_t numCacheRemoves{0};
// number of remove calls that resulted in a ram hit
uint64_t numCacheRemoveRamHits{0};
// number of item destructor calls from ram
uint64_t numRamDestructorCalls{0};
// number of nvm gets
uint64_t numNvmGets{0};
// number of nvm misses
uint64_t numNvmGetMiss{0};
// number of nvm isses due to internal errors
uint64_t numNvmGetMissErrs{0};
// number of nvm misses due to inflight remove on the same key
uint64_t numNvmGetMissDueToInflightRemove{0};
// number of nvm misses that happened synchronously
uint64_t numNvmGetMissFast{0};
// number of nvm gets that are expired
uint64_t numNvmGetMissExpired{0};
// number of gets that joined a concurrent fill for same item
uint64_t numNvmGetCoalesced{0};
// number of deletes issues to nvm
uint64_t numNvmDeletes{0};
// number of deletes skipped and not issued to nvm
uint64_t numNvmSkippedDeletes{0};
// number of writes to nvm
uint64_t numNvmPuts{0};
// number of put errors;
uint64_t numNvmPutErrs{0};
// number of put failures due to encode call back
uint64_t numNvmPutEncodeFailure{0};
// number of puts that observed an inflight delete and aborted
uint64_t numNvmAbortedPutOnTombstone{0};
// number of items that are filtered by compaction
uint64_t numNvmCompactionFiltered{0};
// number of puts that observed an inflight get and aborted
uint64_t numNvmAbortedPutOnInflightGet{0};
// number of evictions from NvmCache
uint64_t numNvmEvictions{0};
// number of evictions where items leave both RAM and NvmCache entirely
uint64_t numCacheEvictions{0};
// number of evictions from nvm that found an inconsistent state in RAM
uint64_t numNvmUncleanEvict{0};
// number of evictions that were issued for an item that was in RAM in clean
// state
uint64_t numNvmCleanEvict{0};
// number of evictions that were issued more than once on an unclean item.
uint64_t numNvmCleanDoubleEvict{0};
// number of evictions that were already expired
uint64_t numNvmExpiredEvict{0};
// number of item destructor calls from nvm
uint64_t numNvmDestructorCalls{0};
// number of RefcountOverflow happens causing item destructor
// being skipped in nvm
uint64_t numNvmDestructorRefcountOverflow{0};
// number of puts to nvm of a clean item in RAM due to nvm eviction.
uint64_t numNvmPutFromClean{0};
// attempts made from nvm cache to allocate an item for promotion
uint64_t numNvmAllocAttempts{0};
// attempts made from nvm cache to allocate an item for its destructor
uint64_t numNvmAllocForItemDestructor{0};
// heap allocate errors for item destrutor
uint64_t numNvmItemDestructorAllocErrors{0};
// size of itemRemoved_ hash set in nvm
uint64_t numNvmItemRemovedSetSize{0};
// number of attempts to allocate an item
uint64_t allocAttempts{0};
// number of eviction attempts
uint64_t evictionAttempts{0};
// number of failures to allocate an item due to internal error
uint64_t allocFailures{0};
// number of evictions across all the pools in the cache.
uint64_t numEvictions{0};
// number of allocation attempts with invalid input params.
uint64_t invalidAllocs{0};
// total number of items
uint64_t numItems{0};
// number of refcount overflows
uint64_t numRefcountOverflow{0};
// number of exception occurred inside item destructor
uint64_t numDestructorExceptions{0};
// number of allocated and CHAINED items that are parents (i.e.,
// consisting of at least one chained child)
uint64_t numChainedChildItems{0};
// number of allocated and CHAINED items that are children (i.e.,
// allocated with a parent handle that it's chained to)
uint64_t numChainedParentItems{0};
// number of eviction failures
uint64_t numEvictionFailureFromAccessContainer{0};
uint64_t numEvictionFailureFromConcurrentFill{0};
uint64_t numEvictionFailureFromParentAccessContainer{0};
uint64_t numEvictionFailureFromMoving{0};
uint64_t numEvictionFailureFromParentMoving{0};
// latency and percentile stats of various cachelib operations
util::PercentileStats::Estimates allocateLatencyNs{};
util::PercentileStats::Estimates moveChainedLatencyNs{};
util::PercentileStats::Estimates moveRegularLatencyNs{};
util::PercentileStats::Estimates nvmLookupLatencyNs{};
util::PercentileStats::Estimates nvmInsertLatencyNs{};
util::PercentileStats::Estimates nvmRemoveLatencyNs{};
util::PercentileStats::Estimates ramEvictionAgeSecs{};
util::PercentileStats::Estimates ramItemLifeTimeSecs{};
util::PercentileStats::Estimates nvmSmallLifetimeSecs{};
util::PercentileStats::Estimates nvmLargeLifetimeSecs{};
util::PercentileStats::Estimates nvmEvictionSecondsPastExpiry{};
util::PercentileStats::Estimates nvmEvictionSecondsToExpiry{};
util::PercentileStats::Estimates nvmPutSize{};
// time when CacheAllocator structure is created. Whenever a process restarts
// and even if cache content is persisted, this will be reset. It's similar
// to process uptime. (But alternatively if user explicitly shuts down and
// re-attach cache, this will be reset as well)
uint64_t cacheInstanceUpTime{0};
// time since the ram cache was created in seconds
uint64_t ramUpTime{0};
// time since the nvm cache was created in seconds
uint64_t nvmUpTime{0};
// If true, it means ram cache is brand new, or it was not restored from a
// previous cache instance
bool isNewRamCache{false};
// If true, it means nvm cache is brand new, or it was not restored from a
// previous cache instance
bool isNewNvmCache{false};
// if nvmcache is currently active and serving gets
bool nvmCacheEnabled;
// stats related to the reaper
ReaperStats reaperStats;
uint64_t numNvmRejectsByExpiry{};
uint64_t numNvmRejectsByClean{};
uint64_t numNvmRejectsByAP{};
// Decryption and Encryption errors
uint64_t numNvmEncryptionErrors{0};
uint64_t numNvmDecryptionErrors{0};
// Number of times slab release was aborted due to shutdown
uint64_t numAbortedSlabReleases{0};
// Number of times slab was skipped when reaper runs
uint64_t numReaperSkippedSlabs{0};
// current active handles outstanding. This stat should
// not go to negative. If it's negative, it means we have
// leaked handles (or some sort of accounting bug internally)
int64_t numActiveHandles;
};
struct CacheMemoryStats {
// current memory used for cache in bytes. This excludes the memory used for
// headers. This can change as memory is advised and reclaimed.
size_t cacheSize{0};
// regular pool memory size in bytes
size_t regularCacheSize{0};
// compact cache pool memory size in bytes
size_t compactCacheSize{0};
// current advised away memory size in bytes.
size_t advisedSize{0};
// maximum advised pct of regular cache.
size_t maxAdvisedPct{0};
// amount of memory that is not assigned for any pool in bytes
size_t unReservedSize{0};
// size of the nvm cache in addition to the ram cache.
size_t nvmCacheSize{0};
// returns the advised memory in the unit of slabs.
size_t numAdvisedSlabs() const { return advisedSize / Slab::kSize; }
// returne usable portion of the cache size
size_t usableCacheSize() const { return cacheSize - advisedSize; }
// amount of memory available on the host
size_t memAvailableSize{0};
// rss size of the process
size_t memRssSize{0};
};
// Stats for compact cache
struct CCacheStats {
uint64_t get;
uint64_t getHit;
uint64_t getMiss;
uint64_t getErr;
uint64_t tailHits;
uint64_t set;
uint64_t setHit;
uint64_t setMiss;
uint64_t setErr;
uint64_t evictions;
uint64_t del;
uint64_t delHit;
uint64_t delMiss;
uint64_t delErr;
uint64_t purgeSuccess;
uint64_t purgeErr;
uint64_t lockTimeout;
uint64_t promoteTimeout;
double hitRatio() const;
CCacheStats& operator+=(const CCacheStats& other) {
get += other.get;
getHit += other.getHit;
getMiss += other.getMiss;
getErr += other.getErr;
tailHits += other.tailHits;
set += other.set;
setHit += other.setHit;
setMiss += other.setMiss;
setErr += other.setErr;
evictions += other.evictions;
del += other.del;
delHit += other.delHit;
delMiss += other.delMiss;
delErr += other.delErr;
purgeSuccess += other.purgeSuccess;
purgeErr += other.purgeErr;
lockTimeout += other.lockTimeout;
promoteTimeout += other.promoteTimeout;
return *this;
}
};
// Types of background workers
enum PoolWorkerType {
POOL_REBALANCER = 0,
POOL_RESIZER,
MEMORY_MONITOR,
MAX_POOL_WORKER
};
/* Slab release event data */
struct SlabReleaseData {
// Time when release occured.
std::chrono::system_clock::time_point timeOfRelease;
// The class where the slab was released from.
ClassId from;
// The receiver of the released slab.
ClassId to;
// The sequence of this event, with respect to other release events logged by
// this process.
uint64_t sequenceNum;
// Time release took.
uint64_t durationMs;
// PoolId of the pool where the rebalance occurred.
PoolId pid;
// Number of slabs in the victim class after rebalancing.
unsigned int numSlabsInVictim;
// Number of slabs in the receiver class after rebalancing.
unsigned int numSlabsInReceiver;
// Allocation size of the victim class.
uint32_t victimAllocSize;
// Allocation size of the receiver class.
uint32_t receiverAllocSize;
// Eviction age of the victim class.
uint64_t victimEvictionAge;
// Eviction age of the receiver class.
uint64_t receiverEvictionAge;
// Number of free allocs in the victim class
uint64_t numFreeAllocsInVictim;
};
using SlabReleaseEvents = std::vector<SlabReleaseData>;
// Slab release events organized by their type
struct AllSlabReleaseEvents {
SlabReleaseEvents rebalancerEvents;
SlabReleaseEvents resizerEvents;
SlabReleaseEvents monitorEvents;
};
} // namespace cachelib
} // namespace facebook