Skip to content

Commit 0b4769a

Browse files
committed
display: minor optimizations for rectangle drawing, fix alignment check
The check for whether double-width writes can occur was incorrectly based only on whether the number of x pixels to write was even. Fix this to also take the alignment of the starting position into account. Allow double-width writes for all pixels where the upper and lower color bytes are the same. Avoid computing/adding the stride in the draw loop where possible. Alignment mismatch found by asan.
1 parent 59faf46 commit 0b4769a

1 file changed

Lines changed: 19 additions & 21 deletions

File tree

main/display_hw.c

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -319,57 +319,55 @@ inline void display_hw_draw_rect(int x, int y, int w, int h, const uint16_t colo
319319
const int calculatedx = x - CONFIG_DISPLAY_OFFSET_X;
320320
const int calculatedy = y - CONFIG_DISPLAY_OFFSET_Y;
321321
uint16_t* screen_ptr = &disp_buf[calculatedx + calculatedy * CONFIG_DISPLAY_WIDTH];
322+
// When both color bytes are the same, use memset for a small speedup
323+
const bool can_memset = (color >> 8) == (color & 0xff);
324+
// If writing a multiple of 2 pixels to a 4 byte aligned boundary,
325+
// write 2 pixels at a time for a small speedup
326+
const bool can_double = !(((intptr_t)screen_ptr) % 4) && !(w % 2);
322327

323328
if ((!calculatedx && w == CONFIG_DISPLAY_WIDTH)) {
324-
if (color == 0x0000 || color == 0xFFFF) {
325-
// small optimization, we can use memset instead of memcpy if it's black/white
329+
// Full display width. Write continously for a small speedup
330+
if (can_memset) {
326331
jmemset(screen_ptr, color, CONFIG_DISPLAY_WIDTH * h * sizeof(color_t));
327332
} else {
328-
if (w % 2 == 0) {
333+
if (can_double) {
329334
uint32_t* disp_buf_32 = (uint32_t*)screen_ptr;
335+
const size_t num_uints = h * CONFIG_DISPLAY_WIDTH / 2;
330336
const uint32_t color32 = ((uint32_t)color << 16) | color;
331-
const size_t size = CONFIG_DISPLAY_WIDTH * h / 2;
332-
// we do two pixel at the time, FIXME: maybe with ESP32S3 SIMD we can do more?
333-
for (size_t i = 0; i < size; ++i) {
337+
for (size_t i = 0; i < num_uints; ++i) {
334338
disp_buf_32[i] = color32;
335339
}
336340
} else {
337-
// one pixel at the time
338-
for (size_t i = 0; i < h; ++i) {
339-
for (size_t k = 0; k < w; ++k) {
340-
screen_ptr[k + CONFIG_DISPLAY_WIDTH * i] = color;
341-
}
341+
const size_t num_pixels = h * CONFIG_DISPLAY_WIDTH;
342+
for (size_t i = 0; i < num_pixels; ++i) {
343+
screen_ptr[i] = color;
342344
}
343345
}
344346
}
345347
} else {
346-
if ((color == 0x0000 || color == 0xFFFF)) {
347-
// in this we can use memset still, per line
348+
if (can_memset) {
348349
const int data_stride = w * sizeof(color_t);
349350
for (size_t i = 0; i < h; ++i) {
350351
jmemset(screen_ptr, color, data_stride);
351352
screen_ptr += CONFIG_DISPLAY_WIDTH;
352353
}
353354
} else {
354-
// it's not black or white so we can't use memset
355-
if (w % 2 == 0) {
356-
// we can do two pixel at the time
357-
// FIXME: maybe we can do more with ESP32S3 SIMD?
355+
if (can_double) {
358356
uint32_t* disp_buf_32 = (uint32_t*)screen_ptr;
357+
const size_t num_uints = w / 2;
359358
const uint32_t color32 = ((uint32_t)color << 16) | color;
360-
const size_t size = w / 2;
361359
for (size_t i = 0; i < h; ++i) {
362-
for (size_t k = 0; k < size; ++k) {
360+
for (size_t k = 0; k < num_uints; ++k) {
363361
disp_buf_32[k] = color32;
364362
}
365363
disp_buf_32 += CONFIG_DISPLAY_WIDTH / 2;
366364
}
367365
} else {
368-
// one pixel at the time
369366
for (size_t i = 0; i < h; ++i) {
370367
for (size_t k = 0; k < w; ++k) {
371-
screen_ptr[k + CONFIG_DISPLAY_WIDTH * i] = color;
368+
screen_ptr[k] = color;
372369
}
370+
screen_ptr += CONFIG_DISPLAY_WIDTH;
373371
}
374372
}
375373
}

0 commit comments

Comments
 (0)