2828 *
2929 * Also need to add code to deal with cards endians that are different than
3030 * the native cpu endians. I also need to deal with MSB position in the word.
31+ * Modified by Harm Hanemaaijer (fgenfb@yahoo.com) 2013:
32+ * - Provide optimized versions of fast_imageblit for 16 and 32bpp that are
33+ * significantly faster than the previous implementation.
34+ * - Simplify the fast/slow_imageblit selection code, avoiding integer
35+ * divides.
3136 */
3237#include <linux/module.h>
3338#include <linux/string.h>
@@ -262,6 +267,133 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
262267 }
263268}
264269
270+ /*
271+ * Optimized fast_imageblit for bpp == 16. ppw = 2, bit_mask = 3 folded
272+ * into the code, main loop unrolled.
273+ */
274+
275+ static inline void fast_imageblit16 (const struct fb_image * image ,
276+ struct fb_info * p , u8 __iomem * dst1 ,
277+ u32 fgcolor , u32 bgcolor )
278+ {
279+ u32 fgx = fgcolor , bgx = bgcolor ;
280+ u32 spitch = (image -> width + 7 ) / 8 ;
281+ u32 end_mask , eorx ;
282+ const char * s = image -> data , * src ;
283+ u32 __iomem * dst ;
284+ const u32 * tab = NULL ;
285+ int i , j , k ;
286+
287+ tab = fb_be_math (p ) ? cfb_tab16_be : cfb_tab16_le ;
288+
289+ fgx <<= 16 ;
290+ bgx <<= 16 ;
291+ fgx |= fgcolor ;
292+ bgx |= bgcolor ;
293+
294+ eorx = fgx ^ bgx ;
295+ k = image -> width / 2 ;
296+
297+ for (i = image -> height ; i -- ;) {
298+ dst = (u32 __iomem * ) dst1 ;
299+ src = s ;
300+
301+ j = k ;
302+ while (j >= 4 ) {
303+ u8 bits = * src ;
304+ end_mask = tab [(bits >> 6 ) & 3 ];
305+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
306+ end_mask = tab [(bits >> 4 ) & 3 ];
307+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
308+ end_mask = tab [(bits >> 2 ) & 3 ];
309+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
310+ end_mask = tab [bits & 3 ];
311+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
312+ src ++ ;
313+ j -= 4 ;
314+ }
315+ if (j != 0 ) {
316+ u8 bits = * src ;
317+ end_mask = tab [(bits >> 6 ) & 3 ];
318+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
319+ if (j >= 2 ) {
320+ end_mask = tab [(bits >> 4 ) & 3 ];
321+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
322+ if (j == 3 ) {
323+ end_mask = tab [(bits >> 2 ) & 3 ];
324+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst );
325+ }
326+ }
327+ }
328+ dst1 += p -> fix .line_length ;
329+ s += spitch ;
330+ }
331+ }
332+
333+ /*
334+ * Optimized fast_imageblit for bpp == 32. ppw = 1, bit_mask = 1 folded
335+ * into the code, main loop unrolled.
336+ */
337+
338+ static inline void fast_imageblit32 (const struct fb_image * image ,
339+ struct fb_info * p , u8 __iomem * dst1 ,
340+ u32 fgcolor , u32 bgcolor )
341+ {
342+ u32 fgx = fgcolor , bgx = bgcolor ;
343+ u32 spitch = (image -> width + 7 ) / 8 ;
344+ u32 end_mask , eorx ;
345+ const char * s = image -> data , * src ;
346+ u32 __iomem * dst ;
347+ const u32 * tab = NULL ;
348+ int i , j , k ;
349+
350+ tab = cfb_tab32 ;
351+
352+ eorx = fgx ^ bgx ;
353+ k = image -> width ;
354+
355+ for (i = image -> height ; i -- ;) {
356+ dst = (u32 __iomem * ) dst1 ;
357+ src = s ;
358+
359+ j = k ;
360+ while (j >= 8 ) {
361+ u8 bits = * src ;
362+ end_mask = tab [(bits >> 7 ) & 1 ];
363+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
364+ end_mask = tab [(bits >> 6 ) & 1 ];
365+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
366+ end_mask = tab [(bits >> 5 ) & 1 ];
367+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
368+ end_mask = tab [(bits >> 4 ) & 1 ];
369+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
370+ end_mask = tab [(bits >> 3 ) & 1 ];
371+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
372+ end_mask = tab [(bits >> 2 ) & 1 ];
373+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
374+ end_mask = tab [(bits >> 1 ) & 1 ];
375+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
376+ end_mask = tab [bits & 1 ];
377+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
378+ src ++ ;
379+ j -= 8 ;
380+ }
381+ if (j != 0 ) {
382+ u32 bits = (u32 ) * src ;
383+ while (j > 1 ) {
384+ end_mask = tab [(bits >> 7 ) & 1 ];
385+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst ++ );
386+ bits <<= 1 ;
387+ j -- ;
388+ }
389+ end_mask = tab [(bits >> 7 ) & 1 ];
390+ FB_WRITEL ((end_mask & eorx ) ^ bgx , dst );
391+ }
392+ dst1 += p -> fix .line_length ;
393+ s += spitch ;
394+ }
395+ }
396+
265397void cfb_imageblit (struct fb_info * p , const struct fb_image * image )
266398{
267399 u32 fgcolor , bgcolor , start_index , bitstart , pitch_index = 0 ;
@@ -294,11 +426,21 @@ void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
294426 bgcolor = image -> bg_color ;
295427 }
296428
297- if (32 % bpp == 0 && !start_index && !pitch_index &&
298- ((width & (32 /bpp - 1 )) == 0 ) &&
299- bpp >= 8 && bpp <= 32 )
300- fast_imageblit (image , p , dst1 , fgcolor , bgcolor );
301- else
429+ if (!start_index && !pitch_index ) {
430+ if (bpp == 32 )
431+ fast_imageblit32 (image , p , dst1 , fgcolor ,
432+ bgcolor );
433+ else if (bpp == 16 && (width & 1 ) == 0 )
434+ fast_imageblit16 (image , p , dst1 , fgcolor ,
435+ bgcolor );
436+ else if (bpp == 8 && (width & 3 ) == 0 )
437+ fast_imageblit (image , p , dst1 , fgcolor ,
438+ bgcolor );
439+ else
440+ slow_imageblit (image , p , dst1 , fgcolor ,
441+ bgcolor ,
442+ start_index , pitch_index );
443+ } else
302444 slow_imageblit (image , p , dst1 , fgcolor , bgcolor ,
303445 start_index , pitch_index );
304446 } else
@@ -311,3 +453,4 @@ MODULE_AUTHOR("James Simmons <jsimmons@users.sf.net>");
311453MODULE_DESCRIPTION ("Generic software accelerated imaging drawing" );
312454MODULE_LICENSE ("GPL" );
313455
456+
0 commit comments