Skip to content

Commit c0cdfdb

Browse files
hglmPixelBoot
authored andcommitted
Optimized Console FrameBuffer for upto 70% increase in Performance
Signed-off-by: Joe Maples <joe@frap129.org>
1 parent 3c26f3d commit c0cdfdb

1 file changed

Lines changed: 148 additions & 5 deletions

File tree

drivers/video/fbdev/core/cfbimgblt.c

Lines changed: 148 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@
2828
*
2929
* Also need to add code to deal with cards endians that are different than
3030
* the native cpu endians. I also need to deal with MSB position in the word.
31+
* Modified by Harm Hanemaaijer (fgenfb@yahoo.com) 2013:
32+
* - Provide optimized versions of fast_imageblit for 16 and 32bpp that are
33+
* significantly faster than the previous implementation.
34+
* - Simplify the fast/slow_imageblit selection code, avoiding integer
35+
* divides.
3136
*/
3237
#include <linux/module.h>
3338
#include <linux/string.h>
@@ -262,6 +267,133 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
262267
}
263268
}
264269

270+
/*
271+
* Optimized fast_imageblit for bpp == 16. ppw = 2, bit_mask = 3 folded
272+
* into the code, main loop unrolled.
273+
*/
274+
275+
static inline void fast_imageblit16(const struct fb_image *image,
276+
struct fb_info *p, u8 __iomem * dst1,
277+
u32 fgcolor, u32 bgcolor)
278+
{
279+
u32 fgx = fgcolor, bgx = bgcolor;
280+
u32 spitch = (image->width + 7) / 8;
281+
u32 end_mask, eorx;
282+
const char *s = image->data, *src;
283+
u32 __iomem *dst;
284+
const u32 *tab = NULL;
285+
int i, j, k;
286+
287+
tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
288+
289+
fgx <<= 16;
290+
bgx <<= 16;
291+
fgx |= fgcolor;
292+
bgx |= bgcolor;
293+
294+
eorx = fgx ^ bgx;
295+
k = image->width / 2;
296+
297+
for (i = image->height; i--;) {
298+
dst = (u32 __iomem *) dst1;
299+
src = s;
300+
301+
j = k;
302+
while (j >= 4) {
303+
u8 bits = *src;
304+
end_mask = tab[(bits >> 6) & 3];
305+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
306+
end_mask = tab[(bits >> 4) & 3];
307+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
308+
end_mask = tab[(bits >> 2) & 3];
309+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
310+
end_mask = tab[bits & 3];
311+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
312+
src++;
313+
j -= 4;
314+
}
315+
if (j != 0) {
316+
u8 bits = *src;
317+
end_mask = tab[(bits >> 6) & 3];
318+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
319+
if (j >= 2) {
320+
end_mask = tab[(bits >> 4) & 3];
321+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
322+
if (j == 3) {
323+
end_mask = tab[(bits >> 2) & 3];
324+
FB_WRITEL((end_mask & eorx) ^ bgx, dst);
325+
}
326+
}
327+
}
328+
dst1 += p->fix.line_length;
329+
s += spitch;
330+
}
331+
}
332+
333+
/*
334+
* Optimized fast_imageblit for bpp == 32. ppw = 1, bit_mask = 1 folded
335+
* into the code, main loop unrolled.
336+
*/
337+
338+
static inline void fast_imageblit32(const struct fb_image *image,
339+
struct fb_info *p, u8 __iomem * dst1,
340+
u32 fgcolor, u32 bgcolor)
341+
{
342+
u32 fgx = fgcolor, bgx = bgcolor;
343+
u32 spitch = (image->width + 7) / 8;
344+
u32 end_mask, eorx;
345+
const char *s = image->data, *src;
346+
u32 __iomem *dst;
347+
const u32 *tab = NULL;
348+
int i, j, k;
349+
350+
tab = cfb_tab32;
351+
352+
eorx = fgx ^ bgx;
353+
k = image->width;
354+
355+
for (i = image->height; i--;) {
356+
dst = (u32 __iomem *) dst1;
357+
src = s;
358+
359+
j = k;
360+
while (j >= 8) {
361+
u8 bits = *src;
362+
end_mask = tab[(bits >> 7) & 1];
363+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
364+
end_mask = tab[(bits >> 6) & 1];
365+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
366+
end_mask = tab[(bits >> 5) & 1];
367+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
368+
end_mask = tab[(bits >> 4) & 1];
369+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
370+
end_mask = tab[(bits >> 3) & 1];
371+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
372+
end_mask = tab[(bits >> 2) & 1];
373+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
374+
end_mask = tab[(bits >> 1) & 1];
375+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
376+
end_mask = tab[bits & 1];
377+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
378+
src++;
379+
j -= 8;
380+
}
381+
if (j != 0) {
382+
u32 bits = (u32) * src;
383+
while (j > 1) {
384+
end_mask = tab[(bits >> 7) & 1];
385+
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
386+
bits <<= 1;
387+
j--;
388+
}
389+
end_mask = tab[(bits >> 7) & 1];
390+
FB_WRITEL((end_mask & eorx) ^ bgx, dst);
391+
}
392+
dst1 += p->fix.line_length;
393+
s += spitch;
394+
}
395+
}
396+
265397
void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
266398
{
267399
u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
@@ -294,11 +426,21 @@ void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
294426
bgcolor = image->bg_color;
295427
}
296428

297-
if (32 % bpp == 0 && !start_index && !pitch_index &&
298-
((width & (32/bpp-1)) == 0) &&
299-
bpp >= 8 && bpp <= 32)
300-
fast_imageblit(image, p, dst1, fgcolor, bgcolor);
301-
else
429+
if (!start_index && !pitch_index) {
430+
if (bpp == 32)
431+
fast_imageblit32(image, p, dst1, fgcolor,
432+
bgcolor);
433+
else if (bpp == 16 && (width & 1) == 0)
434+
fast_imageblit16(image, p, dst1, fgcolor,
435+
bgcolor);
436+
else if (bpp == 8 && (width & 3) == 0)
437+
fast_imageblit(image, p, dst1, fgcolor,
438+
bgcolor);
439+
else
440+
slow_imageblit(image, p, dst1, fgcolor,
441+
bgcolor,
442+
start_index, pitch_index);
443+
} else
302444
slow_imageblit(image, p, dst1, fgcolor, bgcolor,
303445
start_index, pitch_index);
304446
} else
@@ -311,3 +453,4 @@ MODULE_AUTHOR("James Simmons <jsimmons@users.sf.net>");
311453
MODULE_DESCRIPTION("Generic software accelerated imaging drawing");
312454
MODULE_LICENSE("GPL");
313455

456+

0 commit comments

Comments
 (0)