Skip to content

Commit ce51b80

Browse files
committed
ITS: add memory stats
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent ba009cd commit ce51b80

2 files changed

Lines changed: 145 additions & 34 deletions

File tree

Detectors/ITSMFT/ITS/tracking/include/ITStracking/BoundedAllocator.h

Lines changed: 129 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,15 @@
2121
#include <atomic>
2222
#include <new>
2323
#include <vector>
24+
#define BOUNDED_MR_STATS
25+
#ifdef BOUNDED_MR_STATS
26+
#include <bit>
27+
#endif
2428

2529
#include "ITStracking/ExternalAllocator.h"
26-
27-
#include "GPUCommonLogger.h"
30+
#if !defined(GPUCA_GPUCODE_DEVICE)
31+
#include "Framework/Logger.h"
32+
#endif
2833

2934
namespace o2::its
3035
{
@@ -36,95 +41,185 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
3641
{
3742
public:
3843
MemoryLimitExceeded(size_t attempted, size_t used, size_t max)
39-
: mAttempted(attempted), mUsed(used), mMax(max) {}
40-
const char* what() const noexcept final
4144
{
42-
static thread_local char msg[256];
43-
if (mAttempted != 0) {
44-
snprintf(msg, sizeof(msg),
45-
"Reached set memory limit (attempted: %zu, used: %zu, max: %zu)",
46-
mAttempted, mUsed, mMax);
45+
char buf[256];
46+
if (attempted != 0) {
47+
(void)snprintf(buf, sizeof(buf), "Reached set memory limit (attempted: %zu, used: %zu, max: %zu)", attempted, used, max);
4748
} else {
48-
snprintf(msg, sizeof(msg),
49-
"New set maximum below current used (newMax: %zu, used: %zu)",
50-
mMax, mUsed);
49+
(void)snprintf(buf, sizeof(buf), "New set maximum below current used (newMax: %zu, used: %zu)", max, used);
5150
}
52-
return msg;
51+
mMsg = buf;
5352
}
53+
const char* what() const noexcept final { return mMsg.c_str(); }
5454

5555
private:
56-
size_t mAttempted{0}, mUsed{0}, mMax{0};
56+
std::string mMsg;
5757
};
5858

59-
BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(), std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
59+
BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(),
60+
std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
6061
: mMaxMemory(maxBytes), mUpstream(upstream) {}
61-
BoundedMemoryResource(ExternalAllocator* alloc) : mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)), mUpstream(mAdaptor.get()) {}
62+
63+
BoundedMemoryResource(ExternalAllocator* alloc,
64+
size_t maxBytes = std::numeric_limits<size_t>::max())
65+
: mMaxMemory(maxBytes),
66+
mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)),
67+
mUpstream(mAdaptor.get()) {}
6268

6369
void* do_allocate(size_t bytes, size_t alignment) final
6470
{
65-
size_t new_used{0}, current_used{mUsedMemory.load(std::memory_order_relaxed)};
71+
size_t new_used{0};
72+
size_t current_used{mUsedMemory.load(std::memory_order_relaxed)};
6673
do {
6774
new_used = current_used + bytes;
68-
if (new_used > mMaxMemory) {
69-
++mCountThrow;
70-
throw MemoryLimitExceeded(new_used, current_used, mMaxMemory);
75+
if (new_used > mMaxMemory.load(std::memory_order_relaxed)) {
76+
mCountThrow.fetch_add(1, std::memory_order_relaxed);
77+
throw MemoryLimitExceeded(new_used, current_used,
78+
mMaxMemory.load(std::memory_order_relaxed));
7179
}
7280
} while (!mUsedMemory.compare_exchange_weak(current_used, new_used,
7381
std::memory_order_acq_rel,
7482
std::memory_order_relaxed));
83+
7584
void* p{nullptr};
7685
try {
7786
p = mUpstream->allocate(bytes, alignment);
7887
} catch (...) {
7988
mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed);
89+
#ifdef BOUNDED_MR_STATS
90+
mStats.upstreamFailures.fetch_add(1, std::memory_order_relaxed);
91+
#endif
8092
throw;
8193
}
94+
95+
#ifdef BOUNDED_MR_STATS
96+
size_t peak = mStats.peak.load(std::memory_order_relaxed);
97+
while (new_used > peak &&
98+
!mStats.peak.compare_exchange_weak(peak, new_used,
99+
std::memory_order_relaxed)) {
100+
}
101+
mStats.live.fetch_add(1, std::memory_order_relaxed);
102+
mStats.nAlloc.fetch_add(1, std::memory_order_relaxed);
103+
mStats.totalAlloc.fetch_add(bytes, std::memory_order_relaxed);
104+
105+
size_t ma = mStats.maxAlign.load(std::memory_order_relaxed);
106+
while (alignment > ma &&
107+
!mStats.maxAlign.compare_exchange_weak(ma, alignment,
108+
std::memory_order_relaxed)) {
109+
}
110+
111+
unsigned b = (bytes <= 1) ? 0u
112+
: 63u - static_cast<unsigned>(std::countl_zero(bytes - 1));
113+
if (b >= mStats.sizeBuckets.size()) {
114+
b = mStats.sizeBuckets.size() - 1;
115+
}
116+
mStats.sizeBuckets[b].fetch_add(1, std::memory_order_relaxed);
117+
#endif
82118
return p;
83119
}
84120

85121
void do_deallocate(void* p, size_t bytes, size_t alignment) final
86122
{
87123
mUpstream->deallocate(p, bytes, alignment);
88124
mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed);
125+
#ifdef BOUNDED_MR_STATS
126+
mStats.live.fetch_sub(1, std::memory_order_relaxed);
127+
mStats.nFree.fetch_add(1, std::memory_order_relaxed);
128+
mStats.totalFreed.fetch_add(bytes, std::memory_order_relaxed);
129+
#endif
89130
}
90131

91132
bool do_is_equal(const std::pmr::memory_resource& other) const noexcept final
92133
{
93134
return this == &other;
94135
}
95136

96-
size_t getUsedMemory() const noexcept { return mUsedMemory.load(); }
97-
size_t getMaxMemory() const noexcept { return mMaxMemory; }
137+
[[nodiscard]] size_t getUsedMemory() const noexcept
138+
{
139+
return mUsedMemory.load(std::memory_order_relaxed);
140+
}
141+
[[nodiscard]] size_t getMaxMemory() const noexcept
142+
{
143+
return mMaxMemory.load(std::memory_order_relaxed);
144+
}
145+
[[nodiscard]] size_t getThrowCount() const noexcept
146+
{
147+
return mCountThrow.load(std::memory_order_relaxed);
148+
}
149+
98150
void setMaxMemory(size_t max)
99151
{
100-
if (max == mMaxMemory) {
152+
size_t current = mMaxMemory.load(std::memory_order_relaxed);
153+
if (max == current) {
101154
return;
102155
}
103-
size_t used = mUsedMemory.load(std::memory_order_acquire);
104-
if (used > max) {
105-
++mCountThrow;
106-
throw MemoryLimitExceeded(0, used, max);
156+
for (;;) {
157+
size_t used = mUsedMemory.load(std::memory_order_acquire);
158+
if (used > max) {
159+
mCountThrow.fetch_add(1, std::memory_order_relaxed);
160+
throw MemoryLimitExceeded(0, used, max);
161+
}
162+
if (mMaxMemory.compare_exchange_weak(current, max,
163+
std::memory_order_release,
164+
std::memory_order_relaxed)) {
165+
return;
166+
}
167+
if (current == max) {
168+
return;
169+
}
107170
}
108-
mMaxMemory.store(max, std::memory_order_release);
109171
}
110172

173+
#if !defined(GPUCA_GPUCODE_DEVICE)
111174
void print() const
112175
{
113-
#if !defined(GPUCA_GPUCODE_DEVICE)
114-
constexpr double GB{1024 * 1024 * 1024};
115-
auto throw_ = mCountThrow.load(std::memory_order_relaxed);
116-
auto used = static_cast<double>(mUsedMemory.load(std::memory_order_relaxed));
117-
LOGP(info, "maxthrow={} maxmem={:.2f} GB used={:.2f} ({:.2f}%)",
118-
throw_, (double)mMaxMemory / GB, used / GB, 100. * used / (double)mMaxMemory);
176+
constexpr double GB{1024.0 * 1024.0 * 1024.0};
177+
const auto throw_ = mCountThrow.load(std::memory_order_relaxed);
178+
const auto used = static_cast<double>(mUsedMemory.load(std::memory_order_relaxed));
179+
const auto maxm = mMaxMemory.load(std::memory_order_relaxed);
180+
if (maxm == std::numeric_limits<size_t>::max()) {
181+
LOGP(info, "maxthrow={} maxmem=unbounded used={:.2f} GB",
182+
throw_, used / GB);
183+
} else {
184+
LOGP(info, "maxthrow={} maxmem={:.2f} GB used={:.2f} GB ({:.2f}%)",
185+
throw_, (double)maxm / GB, used / GB,
186+
100.0 * used / (double)maxm);
187+
}
188+
#ifdef BOUNDED_MR_STATS
189+
LOGP(info, " peak={:.2f} GB live={} nAlloc={} nFree={} totalAlloc={:.2f} GB totalFreed={:.2f} GB maxAlign={} upstreamFail={}",
190+
mStats.peak.load(std::memory_order_relaxed) / GB,
191+
mStats.live.load(std::memory_order_relaxed),
192+
mStats.nAlloc.load(std::memory_order_relaxed),
193+
mStats.nFree.load(std::memory_order_relaxed),
194+
mStats.totalAlloc.load(std::memory_order_relaxed) / GB,
195+
mStats.totalFreed.load(std::memory_order_relaxed) / GB,
196+
mStats.maxAlign.load(std::memory_order_relaxed),
197+
mStats.upstreamFailures.load(std::memory_order_relaxed));
119198
#endif
120199
}
200+
#endif
121201

122202
private:
123203
std::atomic<size_t> mMaxMemory{std::numeric_limits<size_t>::max()};
124204
std::atomic<size_t> mCountThrow{0};
125205
std::atomic<size_t> mUsedMemory{0};
126206
std::unique_ptr<ExternalAllocatorAdaptor> mAdaptor{nullptr};
127207
std::pmr::memory_resource* mUpstream{nullptr};
208+
209+
#ifdef BOUNDED_MR_STATS
210+
struct Stats {
211+
std::atomic<size_t> peak{0};
212+
std::atomic<size_t> live{0};
213+
std::atomic<size_t> nAlloc{0};
214+
std::atomic<size_t> nFree{0};
215+
std::atomic<size_t> totalAlloc{0};
216+
std::atomic<size_t> totalFreed{0};
217+
std::atomic<size_t> maxAlign{0};
218+
std::atomic<size_t> upstreamFailures{0};
219+
std::array<std::atomic<size_t>, 32> sizeBuckets{};
220+
};
221+
Stats mStats{};
222+
#endif
128223
};
129224

130225
template <typename T>

Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,28 @@ void Tracker<NLayers>::clustersToTracks(const LogFunc& logger, const LogFunc& er
7979
total += evaluateTask(&Tracker::initialiseTimeFrame, StateNames[mCurState = TFInit], iteration, logger, iteration);
8080
do {
8181
timeTracklets += evaluateTask(&Tracker::computeTracklets, StateNames[mCurState = Trackleting], iteration, evalLog, iteration, iVertex);
82+
if (mTrkParams[iteration].PrintMemory) {
83+
LOGP(info, "{}:{}", iteration, StateNames[mCurState]);
84+
mMemoryPool->print();
85+
}
8286
nTracklets += mTraits->getTFNumberOfTracklets();
8387
timeCells += evaluateTask(&Tracker::computeCells, StateNames[mCurState = Celling], iteration, evalLog, iteration);
88+
if (mTrkParams[iteration].PrintMemory) {
89+
LOGP(info, "{}:{}", iteration, StateNames[mCurState]);
90+
mMemoryPool->print();
91+
}
8492
nCells += mTraits->getTFNumberOfCells();
8593
timeNeighbours += evaluateTask(&Tracker::findCellsNeighbours, StateNames[mCurState = Neighbouring], iteration, evalLog, iteration);
94+
if (mTrkParams[iteration].PrintMemory) {
95+
LOGP(info, "{}:{}", iteration, StateNames[mCurState]);
96+
mMemoryPool->print();
97+
}
8698
nNeighbours += mTimeFrame->getNumberOfNeighbours();
8799
timeRoads += evaluateTask(&Tracker::findRoads, StateNames[mCurState = Roading], iteration, evalLog, iteration);
100+
if (mTrkParams[iteration].PrintMemory) {
101+
LOGP(info, "{}:{}", iteration, StateNames[mCurState]);
102+
mMemoryPool->print();
103+
}
88104
} while (++iVertex < maxNvertices);
89105
logger(std::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets));
90106
logger(std::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells));

0 commit comments

Comments
 (0)