From 6bd9122096355df3e570b1d1294bd02de42dc6d2 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Mon, 30 Mar 2026 19:52:37 +0200 Subject: [PATCH] use fast path in blendSegment to bump up FPS --- wled00/FX_fcn.cpp | 67 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp index e538b342f6..0841e0b3ef 100644 --- a/wled00/FX_fcn.cpp +++ b/wled00/FX_fcn.cpp @@ -1367,6 +1367,7 @@ static uint8_t _dummy (uint8_t a, uint8_t b) { return a; } // dummy (same as void WS2812FX::blendSegment(const Segment &topSegment) const { typedef uint8_t(*FuncType)(uint8_t, uint8_t); // function pointer array: fill with _dummy if using special case: avoid OOB access and always provide a valid path + // note: making the function array static const uses more ram and comes at no significant speed gain FuncType funcs[] = { _dummy, _dummy, _dummy, _subtract, _difference, _average, _dummy, _divide, @@ -1398,14 +1399,69 @@ void WS2812FX::blendSegment(const Segment &topSegment) const { const size_t matrixSize = Segment::maxWidth * Segment::maxHeight; const size_t startIndx = XY(topSegment.start, topSegment.startY); const size_t stopIndx = startIndx + length; - const unsigned progress = topSegment.progress(); - const unsigned progInv = 0xFFFFU - progress; uint8_t opacity = topSegment.currentBri(); // returns transitioned opacity for style FADE uint8_t cct = topSegment.currentCCT(); if (gammaCorrectCol) opacity = gamma8inv(opacity); // use inverse gamma on brightness for correct color scaling after gamma correction (see #5343 for details) - Segment::setClippingRect(0, 0); // disable clipping by default + const Segment *segO = topSegment.getOldSegment(); + const bool hasGrouping = topSegment.groupLength() != 1; + // fast path: handle the default case - no transitions, no grouping/spacing, no mirroring, no CCT + if (!segO && blendingStyle == TRANSITION_FADE && !hasGrouping && !topSegment.mirror && !topSegment.mirror_y) { + if (isMatrix && width * height <= (int)matrixSize && !_pixelCCT) { +#ifndef WLED_DISABLE_2D + // Calculate pointer steps to avoid 'if' and 'XY()' inside loops + int x_inc = 1; + int y_inc = Segment::maxWidth; + int start_offset = XY(topSegment.start, topSegment.startY); + + // adjust starting position and steps based on Reverse/Transpose + // note: transpose is handled in seperate loop so it is still fast and no branching is needed in default path + if (!topSegment.transpose) { + if (topSegment.reverse) { start_offset += (width - 1); x_inc = -1; } + if (topSegment.reverse_y) { start_offset += (height - 1) * Segment::maxWidth; y_inc = -Segment::maxWidth; } + + for (int y = 0; y < height; y++) { + uint32_t* pRow = &_pixels[start_offset + y * y_inc]; + for (int x = 0; x < width; x++) { + uint32_t* p = pRow + x * x_inc; + uint32_t c_a = topSegment.getPixelColorRaw(x + y * width); + *p = color_blend(*p, segblend(c_a, *p), opacity); + } + } + } else { // transposed + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + int px = topSegment.reverse ? (width - x - 1) : x; + int py = topSegment.reverse_y ? (height - y - 1) : y; + size_t idx = XY(topSegment.start + py, topSegment.startY + px); // Swapped X/Y + _pixels[idx] = color_blend(_pixels[idx], segblend(topSegment.getPixelColorRaw(x + y * width), _pixels[idx]), opacity); + } + } + } + return; +#endif + } else if (!isMatrix) { + // 1D fast path, include CCT as it is more common on 1D setups + uint32_t* strip = _pixels; + int start = topSegment.start; + int off = topSegment.offset; + for (int i = 0; i < length; i++) { + uint32_t c_a = topSegment.getPixelColorRaw(i); + int p = topSegment.reverse ? (length - i - 1) : i; + int idx = start + p + off; + if (idx >= topSegment.stop) idx -= length; + strip[idx] = color_blend(strip[idx], segblend(c_a, strip[idx]), opacity); + if (_pixelCCT) _pixelCCT[idx] = cct; + } + return; + } + } + + // slow path: handle transitions, grouping/spacing, segments with clipping and CCT pixels + Segment::setClippingRect(0, 0); // disable clipping by default + const unsigned progress = topSegment.progress(); + const unsigned progInv = 0xFFFFU - progress; const unsigned dw = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * width / 0xFFFFU + 1; const unsigned dh = (blendingStyle==TRANSITION_OUTSIDE_IN ? progInv : progress) * height / 0xFFFFU + 1; const unsigned orgBS = blendingStyle; @@ -1466,7 +1522,6 @@ void WS2812FX::blendSegment(const Segment &topSegment) const { #ifndef WLED_DISABLE_2D const int nCols = topSegment.virtualWidth(); const int nRows = topSegment.virtualHeight(); - const Segment *segO = topSegment.getOldSegment(); const int oCols = segO ? segO->virtualWidth() : nCols; const int oRows = segO ? segO->virtualHeight() : nRows; @@ -1562,8 +1617,8 @@ void WS2812FX::blendSegment(const Segment &topSegment) const { } #endif } else { + // 1D Slow Path const int nLen = topSegment.virtualLength(); - const Segment *segO = topSegment.getOldSegment(); const int oLen = segO ? segO->virtualLength() : nLen; const auto setMirroredPixel = [&](int i, uint32_t c, uint8_t o) { @@ -1643,7 +1698,7 @@ void WS2812FX::show() { if (realtimeMode == REALTIME_MODE_INACTIVE || useMainSegmentOnly || realtimeOverride > REALTIME_OVERRIDE_NONE) { // clear frame buffer - for (size_t i = 0; i < totalLen; i++) _pixels[i] = BLACK; // memset(_pixels, 0, sizeof(uint32_t) * getLengthTotal()); + memset(_pixels, 0, sizeof(uint32_t) * totalLen); // blend all segments into (cleared) buffer for (Segment &seg : _segments) if (seg.isActive() && (seg.on || seg.isInTransition())) { blendSegment(seg); // blend segment's buffer into frame buffer