Skip to content

Commit 3e5c402

Browse files
authored
fix: Improve scrubbing annotated frames (#1197)
### Fix bug overwriting cached Paint FBOs between frames ### Summarize your change. There was a bug that was causing cached Paint FBOs to be released between frames. In the original code, the paint FBOs were explicitly ignored when cleaning garbage collecting from the cache. However, they were still marked as available, allowing them to be reused. This fix simply avoids that call for Paint FBOs so they remain cached when changing frames (ie when scrubbing or playing back in a loop). That is the core of the fix, but I also implemented some safety mechanisms that were not done in the original implementation to be on the safe side. Everything but the one `continue` statement to avoid marking the FBO as available is related to that. The first safety check is to set an age limit in the Paint FBOs. If we haven't seen them in 300 renders (chosen rather arbitrarily by me, but can be adjusted), it's not likely we'll see them again, so we can safely evict them. I also added a second safety check against unbound cache growth. If there are many large annotated frames all in a row, we can end up generating a huge cache. Although this scenario is unlikely as under normal circumstances the age limit will free the cached FBOs, we don't want to cause problems for the GPU by caching too much, so let's set a limit of 32 Paint FBOs in the cache. If we reach this limit, we'll sort by age and evict the oldest entries, effectively creating an LRU cache for the Paint FBOs. Other than that, I just did some cleanup: - The FBO deletion code was cut-pasted everywhere so I just made a helper function to do it - Found all the FBO deletion code and made it use the new method - Fixed an assert I encountered in src/lib/geometry/TwkPaint/Path.cpp when testing it: rDirectionCoords was getting dir pushed to it twice. This didn't cause any issues, but does trigger an assert in debug. ### Describe the reason for the change. Improve playback performance with annotated frames ### Describe what you have tested and on which operating system. Mac OS 26.3.1 Mac Pro M4 I created a test scenario with 500 annotations per frame and 15 points per stroke on 5 sequential frames. Rendering speed before changes: 6.8 fps Rendering speed after changes: 59.8 fps Nearly a 10x speedup. ### Add a list of changes, and note any that might need special attention during the review. ### If possible, provide screenshots. <img width="331" height="608" alt="Screenshot 2026-03-11 at 15 02 36" src="https://github.com/user-attachments/assets/7b06e1fc-568d-4fdd-9b3f-25de21e9ee6a" /> <img width="327" height="614" alt="Screenshot 2026-03-11 at 15 40 01" src="https://github.com/user-attachments/assets/a64a5398-7c77-46bc-9c9e-be26981a7f50" /> --------- Signed-off-by: Roger Nelson <roger.nelson@autodesk.com>
1 parent 780ae2a commit 3e5c402

3 files changed

Lines changed: 153 additions & 46 deletions

File tree

src/lib/geometry/TwkPaint/Path.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ namespace TwkPaint
103103

104104
IndexTriangle t(n, i2, i0);
105105
rTris.push_back(t);
106-
rDirectionCoords.push_back(dir);
107106

108107
for (size_t q = n + 1; q < na; q++)
109108
{

src/lib/ip/IPCore/IPCore/ImageFBO.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ namespace IPCore
123123
ImageFBO* findExistingPaintFBO(const GLFBO*, const std::string&, bool&, size_t&, size_t);
124124

125125
private:
126+
// Frees all GPU and CPU resources owned by imageFBO. Does not remove it
127+
// from any vector — callers are responsible for vector bookkeeping.
128+
void destroyImageFBO(ImageFBO* imageFBO);
129+
126130
ImageFBOVector m_outputImageFBOs;
127131
GLFenceTextureMap m_textureFenceMap;
128132
// GLFenceFBOMap m_fboFenceMap;

src/lib/ip/IPCore/ImageFBO.cpp

Lines changed: 149 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@
66
//******************************************************************************
77

88
#include <IPCore/ImageFBO.h>
9+
#include <algorithm>
10+
11+
namespace
12+
{
13+
// max length of FBO ids output to debug logs
14+
//
15+
constexpr size_t FBO_ID_DEBUG_LOG_LIMIT = 50;
16+
} // namespace
917

1018
namespace IPCore
1119
{
@@ -49,12 +57,12 @@ namespace IPCore
4957
for (size_t i = 0; i < m_imageFBOs.size(); i++)
5058
{
5159
ImageFBO* f = m_imageFBOs[i];
52-
string s = f->identifier;
53-
if (s.size() > 50)
54-
s = s.substr(0, 50);
60+
string fboId = f->identifier;
61+
if (fboId.size() > FBO_ID_DEBUG_LOG_LIMIT)
62+
fboId = fboId.substr(0, FBO_ID_DEBUG_LOG_LIMIT);
5563

5664
cout << "INFO: " << i << ": " << f->fbo()->width() << "x" << f->fbo()->height() << ", "
57-
<< (f->available ? "available" : "used") << "/" << f->fullSerialNum << ", " << s << endl;
65+
<< (f->available ? "available" : "used") << "/" << f->fullSerialNum << ", " << fboId << endl;
5866
}
5967
}
6068

@@ -86,12 +94,12 @@ namespace IPCore
8694
for (size_t i = 0; i < m_imageFBOs.size(); i++)
8795
{
8896
ImageFBO* f = m_imageFBOs[i];
89-
string s = f->identifier;
90-
if (s.size() > 50)
91-
s = s.substr(0, 50);
97+
string fboId = f->identifier;
98+
if (fboId.size() > FBO_ID_DEBUG_LOG_LIMIT)
99+
fboId = fboId.substr(0, FBO_ID_DEBUG_LOG_LIMIT);
92100

93101
cout << "INFO: " << i << ": " << f->fbo()->width() << "x" << f->fbo()->height() << ", "
94-
<< (f->available ? "available" : "used") << "/" << f->fullSerialNum << ", " << s << endl;
102+
<< (f->available ? "available" : "used") << "/" << f->fullSerialNum << ", " << fboId << endl;
95103
}
96104
}
97105

@@ -197,8 +205,8 @@ namespace IPCore
197205
if (m_imageFBOLog)
198206
{
199207
ImageFBO* i = imageFBO;
200-
// string s = i->identifier;
201-
// if (s.size() > 50) s = s.substr(0, 50);
208+
// string fboId = i->identifier;
209+
// if (fboId.size() > FBO_ID_DEBUG_LOG_LIMIT) fboId = fboId.substr(0, FBO_ID_DEBUG_LOG_LIMIT);
202210
cout << "INFO: (" << fullSerialNum << ") found imageFBO " << i->fbo()->width() << "x" << i->fbo()->height() << ", "
203211
<< (i->available ? "available" : "used") << "/" << i->fullSerialNum << ", " << i->identifier << endl;
204212
}
@@ -225,45 +233,159 @@ namespace IPCore
225233
return 0;
226234
}
227235

236+
void ImageFBOManager::destroyImageFBO(ImageFBO* imageFBO)
237+
{
238+
m_totalSizeInBytes -= imageFBO->fbo()->totalSizeInBytes();
239+
deleteFBOFence(imageFBO->fbo());
240+
delete imageFBO->fbo();
241+
delete imageFBO;
242+
}
243+
228244
void ImageFBOManager::gcImageFBOs(size_t fullSerialNum)
229245
{
246+
247+
// Number of render cycles an unused regular FBO is kept before being freed.
248+
// A small grace window (roughly 200ms at 24fps) prevents thrashing when a
249+
// frame is temporarily skipped during cache warm-up or off-screen evaluation.
250+
constexpr size_t FBO_AGE_LIMIT = 5;
251+
252+
// Number of render cycles an idle paint cache FBO is kept before being freed.
253+
// Paint cache FBOs are pinned (never returned to the general pool) so they
254+
// survive scrubbing. This longer threshold only fires when the annotation has
255+
// been genuinely abandoned — cleared, source removed, or the user has been on
256+
// a completely different part of the timeline — preventing unbounded memory
257+
// accumulation from orphaned cache entries.
258+
constexpr size_t PAINT_FBO_AGE_LIMIT = 300;
259+
260+
// Maximum number of paint cache FBOs retained simultaneously. Each pinned
261+
// paint FBO holds a full-resolution composited image (e.g. ~33MB at 4K 8-bit),
262+
// so without a cap a long annotated timeline could exhaust GPU memory. When
263+
// the limit is exceeded the least-recently-used entries (lowest fullSerialNum)
264+
// are evicted first, keeping the most recently visited frames hot.
265+
constexpr size_t MAX_PAINT_FBO_COUNT = 32;
266+
230267
//
231268
// This is run after each main render -- FBOs all become
232269
// available and those that were untouched from last time are
233270
// freed (age > 1). This is a naively simple caching strategy,
234271
// but seems to be adequate in practice.
235272
//
273+
size_t paintFBOCount = 0;
236274

237275
for (size_t q = 0; q < m_imageFBOs.size(); q++)
238276
{
239277
ImageFBO* i = m_imageFBOs[q];
240278
const size_t age = fullSerialNum - i->fullSerialNum;
279+
const bool isPaintCache = i->identifier.find("paintCmdNo") != string::npos;
280+
281+
if (isPaintCache)
282+
{
283+
// Paint cache FBOs must NOT be returned to the general pool.
284+
// Setting available=true would allow newImageFBO() to grab them
285+
// by dimension match before findExistingPaintFBO() reclaims them,
286+
// destroying the annotation cache and forcing a cold O(N) re-render
287+
// on every scrub. Leave them pinned (available=false) so only
288+
// findExistingPaintFBO() can ever reclaim them.
289+
//
290+
// Evict only after a long idle period — this handles the case where
291+
// annotations were cleared or the source was removed, preventing
292+
// unbounded memory accumulation.
293+
if (age > PAINT_FBO_AGE_LIMIT)
294+
{
295+
if (m_imageFBOLog)
296+
{
297+
string fboId = i->identifier;
298+
if (fboId.size() > FBO_ID_DEBUG_LOG_LIMIT)
299+
fboId = fboId.substr(0, FBO_ID_DEBUG_LOG_LIMIT);
300+
301+
cout << "INFO: gc paint FBO evict (" << fullSerialNum << ") " << i->fbo()->width() << "x" << i->fbo()->height()
302+
<< "/" << i->fullSerialNum << ", " << fboId << endl;
303+
}
304+
305+
destroyImageFBO(i);
306+
m_imageFBOs[q] = m_imageFBOs.back();
307+
m_imageFBOs.pop_back();
308+
q--;
309+
}
310+
else
311+
{
312+
paintFBOCount++;
313+
}
314+
continue;
315+
}
241316

242317
i->available = true;
243318

244-
size_t loc = i->identifier.find("paintCmdNo");
245-
if (age > 5 && loc == string::npos) // do not release the paint FBOs
319+
if (age > FBO_AGE_LIMIT)
246320
{
247321
if (m_imageFBOLog)
248322
{
249-
string s = i->identifier;
250-
if (s.size() > 50)
251-
s = s.substr(0, 50);
323+
string fboId = i->identifier;
324+
if (fboId.size() > FBO_ID_DEBUG_LOG_LIMIT)
325+
fboId = fboId.substr(0, FBO_ID_DEBUG_LOG_LIMIT);
252326

253327
cout << "INFO: gc (" << fullSerialNum << ") " << i->fbo()->width() << "x" << i->fbo()->height() << ", "
254-
<< (i->available ? "available" : "used") << "/" << i->fullSerialNum << ", " << s << endl;
328+
<< (i->available ? "available" : "used") << "/" << i->fullSerialNum << ", " << fboId << endl;
255329
}
256330

257-
m_totalSizeInBytes -= i->fbo()->totalSizeInBytes();
258-
259-
deleteFBOFence(i->fbo());
260-
delete i->fbo();
261-
delete i;
331+
destroyImageFBO(i);
262332
m_imageFBOs[q] = m_imageFBOs.back();
263333
m_imageFBOs.pop_back();
264334
q--;
265335
}
266336
}
337+
338+
//
339+
// Cap the total number of pinned paint cache FBOs to prevent unbounded
340+
// GPU memory growth on long annotated timelines. When the limit is
341+
// exceeded, evict the least-recently-used entries (lowest fullSerialNum)
342+
// first, keeping the most recently visited frames hot in cache.
343+
//
344+
345+
// Collect pointers to surviving paint FBOs only when the cap is exceeded,
346+
// avoiding a heap allocation and second scan in the common case.
347+
if (paintFBOCount <= MAX_PAINT_FBO_COUNT)
348+
return;
349+
350+
vector<ImageFBO*> paintFBOs;
351+
paintFBOs.reserve(paintFBOCount);
352+
for (ImageFBO* fbo : m_imageFBOs)
353+
{
354+
if (fbo->identifier.find("paintCmdNo") != string::npos)
355+
paintFBOs.push_back(fbo);
356+
}
357+
358+
if (paintFBOs.size() > MAX_PAINT_FBO_COUNT)
359+
{
360+
// Sort ascending by fullSerialNum so the oldest (LRU) come first.
361+
sort(paintFBOs.begin(), paintFBOs.end(),
362+
[](const ImageFBO* a, const ImageFBO* b) { return a->fullSerialNum < b->fullSerialNum; });
363+
364+
const size_t evictCount = paintFBOs.size() - MAX_PAINT_FBO_COUNT;
365+
for (size_t e = 0; e < evictCount; e++)
366+
{
367+
ImageFBO* victim = paintFBOs[e];
368+
369+
if (m_imageFBOLog)
370+
{
371+
string fboId = victim->identifier;
372+
if (fboId.size() > FBO_ID_DEBUG_LOG_LIMIT)
373+
fboId = fboId.substr(0, FBO_ID_DEBUG_LOG_LIMIT);
374+
375+
cout << "INFO: gc paint FBO cap evict (" << fullSerialNum << ") " << victim->fbo()->width() << "x"
376+
<< victim->fbo()->height() << "/" << victim->fullSerialNum << ", " << fboId << endl;
377+
}
378+
379+
// Remove from m_imageFBOs using swap-and-pop, then destroy.
380+
auto it = find(m_imageFBOs.begin(), m_imageFBOs.end(), victim);
381+
if (it != m_imageFBOs.end())
382+
{
383+
*it = m_imageFBOs.back();
384+
m_imageFBOs.pop_back();
385+
}
386+
destroyImageFBO(victim);
387+
}
388+
}
267389
}
268390

269391
void ImageFBOManager::insertFBOFence(const GLFBO* fbo)
@@ -407,11 +529,11 @@ namespace IPCore
407529
if (m_imageFBOLog)
408530
{
409531
ImageFBO* i = target;
410-
string s = i->identifier;
411-
if (s.size() > 50)
412-
s = s.substr(0, 50);
532+
string fboId = i->identifier;
533+
if (fboId.size() > FBO_ID_DEBUG_LOG_LIMIT)
534+
fboId = fboId.substr(0, FBO_ID_DEBUG_LOG_LIMIT);
413535
cout << "INFO: release " << i->fbo()->width() << "x" << i->fbo()->height() << ", " << (i->available ? "available" : "used")
414-
<< "/" << i->fullSerialNum << ", " << s << endl;
536+
<< "/" << i->fullSerialNum << ", " << fboId << endl;
415537
}
416538

417539
deleteFBOFence(fbo);
@@ -421,28 +543,10 @@ namespace IPCore
421543
void ImageFBOManager::flushImageFBOs()
422544
{
423545
for (size_t i = 0; i < m_outputImageFBOs.size(); i++)
424-
{
425-
ImageFBO* t = m_outputImageFBOs[i];
426-
deleteFBOFence(t->fbo());
427-
if (t)
428-
{
429-
m_totalSizeInBytes -= t->fbo()->totalSizeInBytes();
430-
delete t->fbo();
431-
}
432-
delete t;
433-
}
546+
destroyImageFBO(m_outputImageFBOs[i]);
434547

435548
for (size_t i = 0; i < m_imageFBOs.size(); i++)
436-
{
437-
ImageFBO* t = m_imageFBOs[i];
438-
deleteFBOFence(t->fbo());
439-
if (t)
440-
{
441-
m_totalSizeInBytes -= t->fbo()->totalSizeInBytes();
442-
delete t->fbo();
443-
}
444-
delete t;
445-
}
549+
destroyImageFBO(m_imageFBOs[i]);
446550

447551
if (m_imageFBOLog)
448552
{

0 commit comments

Comments
 (0)