[smooth] Simplify span rendering more.

It turns out that there is significant cost associated with `FT_Span'
creation and calls to `gray_render_span' because it happerns so
frequently. This removes these steps from our internal use but leaves
it alone for `FT_RASTER_FLAG_DIRECT" to preserve API. The speed gain
is about 5%.

* src/smooth/ftgrays.c (gray_render_span): Removed. The code is
migrated to...
(gray_hline): ... here.
This commit is contained in:
Alexei Podtelezhnikov 2016-09-01 22:56:24 -04:00
parent f44ddfda45
commit 11d3336af3
2 changed files with 49 additions and 43 deletions

View File

@ -1,9 +1,23 @@
2016-09-01 Alexei Podtelezhnikov <apodtele@gmail.com>
[smooth] Simplify span rendering more.
It turns out that there is significant cost associated with `FT_Span'
creation and calls to `gray_render_span' because it happerns so
frequently. This removes these steps from our internal use but leaves
it alone for `FT_RASTER_FLAG_DIRECT" to preserve API. The speed gain
is about 5%.
* src/smooth/ftgrays.c (gray_render_span): Removed. The code is
migrated to...
(gray_hline): ... here.
2016-08-30 Alexei Podtelezhnikov <apodtele@gmail.com>
[smooth] Streamline pixmap drawing a bit more.
Zero coverage is unlikely (1 out of 256) to warrant checking. This
gives 0.5% speed improvement in dendering simple glyphs.
gives 0.5% speed improvement in rendering simple glyphs.
* src/smooth/ftgrays.c (gray_hline, gray_render_span): Remove checks.

View File

@ -1270,42 +1270,6 @@ typedef ptrdiff_t FT_PtrDist;
}
static void
gray_render_span( int y,
int count,
const FT_Span* spans,
gray_PWorker worker )
{
unsigned char* p = worker->target.origin - y * worker->target.pitch;
for ( ; count > 0; count--, spans++ )
{
unsigned char coverage = spans->coverage;
unsigned char* q = p + spans->x;
/* For small-spans it is faster to do it by ourselves than
* calling `memset'. This is mainly due to the cost of the
* function call.
*/
switch ( spans->len )
{
case 7: *q++ = coverage;
case 6: *q++ = coverage;
case 5: *q++ = coverage;
case 4: *q++ = coverage;
case 3: *q++ = coverage;
case 2: *q++ = coverage;
case 1: *q = coverage;
case 0: break;
default:
FT_MEM_SET( q, coverage, spans->len );
}
}
}
static void
gray_hline( RAS_ARG_ TCoord x,
TCoord y,
@ -1342,11 +1306,39 @@ typedef ptrdiff_t FT_PtrDist;
coverage = 255;
}
span.x = (short)( x + ras.min_ex );
span.len = (unsigned short)acount;
span.coverage = (unsigned char)coverage;
if ( ras.render_span ) /* for FT_RASTER_FLAG_DIRECT only */
{
span.x = (short)( x + ras.min_ex );
span.len = (unsigned short)acount;
span.coverage = (unsigned char)coverage;
ras.render_span( y + ras.min_ey, 1, &span, ras.render_span_data );
ras.render_span( y + ras.min_ey, 1, &span, ras.render_span_data );
}
else
{
unsigned char* q = ras.target.origin -
ras.target.pitch * ( y + ras.min_ey ) +
x + ras.min_ex;
/* For small-spans it is faster to do it by ourselves than
* calling `memset'. This is mainly due to the cost of the
* function call.
*/
switch ( acount )
{
case 7: *q++ = coverage;
case 6: *q++ = coverage;
case 5: *q++ = coverage;
case 4: *q++ = coverage;
case 3: *q++ = coverage;
case 2: *q++ = coverage;
case 1: *q = coverage;
case 0: break;
default:
FT_MEM_SET( q, coverage, acount );
}
}
}
@ -1960,8 +1952,8 @@ typedef ptrdiff_t FT_PtrDist;
ras.target.pitch = target_map->pitch;
ras.render_span = (FT_Raster_Span_Func)gray_render_span;
ras.render_span_data = &ras;
ras.render_span = (FT_Raster_Span_Func)NULL;
ras.render_span_data = NULL;
}
FT_Outline_Get_CBox( outline, &cbox );