@@ -319,57 +319,55 @@ inline void display_hw_draw_rect(int x, int y, int w, int h, const uint16_t colo
319319 const int calculatedx = x - CONFIG_DISPLAY_OFFSET_X ;
320320 const int calculatedy = y - CONFIG_DISPLAY_OFFSET_Y ;
321321 uint16_t * screen_ptr = & disp_buf [calculatedx + calculatedy * CONFIG_DISPLAY_WIDTH ];
322+ // When both color bytes are the same, use memset for a small speedup
323+ const bool can_memset = (color >> 8 ) == (color & 0xff );
324+ // If writing a multiple of 2 pixels to a 4 byte aligned boundary,
325+ // write 2 pixels at a time for a small speedup
326+ const bool can_double = !(((intptr_t )screen_ptr ) % 4 ) && !(w % 2 );
322327
323328 if ((!calculatedx && w == CONFIG_DISPLAY_WIDTH )) {
324- if ( color == 0x0000 || color == 0xFFFF ) {
325- // small optimization, we can use memset instead of memcpy if it's black/white
329+ // Full display width. Write continously for a small speedup
330+ if ( can_memset ) {
326331 jmemset (screen_ptr , color , CONFIG_DISPLAY_WIDTH * h * sizeof (color_t ));
327332 } else {
328- if (w % 2 == 0 ) {
333+ if (can_double ) {
329334 uint32_t * disp_buf_32 = (uint32_t * )screen_ptr ;
335+ const size_t num_uints = h * CONFIG_DISPLAY_WIDTH / 2 ;
330336 const uint32_t color32 = ((uint32_t )color << 16 ) | color ;
331- const size_t size = CONFIG_DISPLAY_WIDTH * h / 2 ;
332- // we do two pixel at the time, FIXME: maybe with ESP32S3 SIMD we can do more?
333- for (size_t i = 0 ; i < size ; ++ i ) {
337+ for (size_t i = 0 ; i < num_uints ; ++ i ) {
334338 disp_buf_32 [i ] = color32 ;
335339 }
336340 } else {
337- // one pixel at the time
338- for (size_t i = 0 ; i < h ; ++ i ) {
339- for (size_t k = 0 ; k < w ; ++ k ) {
340- screen_ptr [k + CONFIG_DISPLAY_WIDTH * i ] = color ;
341- }
341+ const size_t num_pixels = h * CONFIG_DISPLAY_WIDTH ;
342+ for (size_t i = 0 ; i < num_pixels ; ++ i ) {
343+ screen_ptr [i ] = color ;
342344 }
343345 }
344346 }
345347 } else {
346- if ((color == 0x0000 || color == 0xFFFF )) {
347- // in this we can use memset still, per line
348+ if (can_memset ) {
348349 const int data_stride = w * sizeof (color_t );
349350 for (size_t i = 0 ; i < h ; ++ i ) {
350351 jmemset (screen_ptr , color , data_stride );
351352 screen_ptr += CONFIG_DISPLAY_WIDTH ;
352353 }
353354 } else {
354- // it's not black or white so we can't use memset
355- if (w % 2 == 0 ) {
356- // we can do two pixel at the time
357- // FIXME: maybe we can do more with ESP32S3 SIMD?
355+ if (can_double ) {
358356 uint32_t * disp_buf_32 = (uint32_t * )screen_ptr ;
357+ const size_t num_uints = w / 2 ;
359358 const uint32_t color32 = ((uint32_t )color << 16 ) | color ;
360- const size_t size = w / 2 ;
361359 for (size_t i = 0 ; i < h ; ++ i ) {
362- for (size_t k = 0 ; k < size ; ++ k ) {
360+ for (size_t k = 0 ; k < num_uints ; ++ k ) {
363361 disp_buf_32 [k ] = color32 ;
364362 }
365363 disp_buf_32 += CONFIG_DISPLAY_WIDTH / 2 ;
366364 }
367365 } else {
368- // one pixel at the time
369366 for (size_t i = 0 ; i < h ; ++ i ) {
370367 for (size_t k = 0 ; k < w ; ++ k ) {
371- screen_ptr [k + CONFIG_DISPLAY_WIDTH * i ] = color ;
368+ screen_ptr [k ] = color ;
372369 }
370+ screen_ptr += CONFIG_DISPLAY_WIDTH ;
373371 }
374372 }
375373 }
0 commit comments