forked from minhngoc25a/freetype2
[smooth] Improve performance.
Provide a work-around for an ARM-specific performance bug in GCC. This speeds up the rasterizer by more than 5%. Also slightly optimize `set_gray_cell' and `gray_record_cell' (which also improves performance on other platforms by a tiny bit (<1%). * src/smooth/ftgrays.c (FT_DIV_MOD): New macro. Use it where appropriate. (gray_record_cell, gray_set_cell, gray_move_to, gray_convert_glyph_inner): Streamline condition handling.
This commit is contained in:
parent
f66d48e923
commit
89929ec6b9
16
ChangeLog
16
ChangeLog
|
@ -1,3 +1,19 @@
|
|||
2013-07-16 David Turner <digit@google.com>
|
||||
|
||||
[smooth] Improve performance.
|
||||
|
||||
Provide a work-around for an ARM-specific performance bug in GCC.
|
||||
This speeds up the rasterizer by more than 5%.
|
||||
|
||||
Also slightly optimize `set_gray_cell' and `gray_record_cell' (which
|
||||
also improves performance on other platforms by a tiny bit (<1%).
|
||||
|
||||
* src/smooth/ftgrays.c (FT_DIV_MOD): New macro.
|
||||
Use it where appropriate.
|
||||
|
||||
(gray_record_cell, gray_set_cell, gray_move_to,
|
||||
gray_convert_glyph_inner): Streamline condition handling.
|
||||
|
||||
2013-07-16 David Turner <digit@google.com>
|
||||
|
||||
[truetype] Add assembler code for TT_MulFix14 and TT_DotFix14.
|
||||
|
|
|
@ -310,6 +310,40 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
#endif
|
||||
|
||||
|
||||
/* Compute `divident / divisor' and return both its quotient and */
|
||||
/* remainder, cast to a specific type. This macro also ensures that */
|
||||
/* the remainder is always positive. */
|
||||
#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
|
||||
FT_BEGIN_STMNT \
|
||||
(quotient) = (type)( (dividend) / (divisor) ); \
|
||||
(remainder) = (type)( (dividend) % (divisor) ); \
|
||||
if ( (remainder) < 0 ) \
|
||||
{ \
|
||||
(quotient)--; \
|
||||
(remainder) += (type)(divisor); \
|
||||
} \
|
||||
FT_END_STMNT
|
||||
|
||||
#ifdef __arm__
|
||||
/* Work around a bug specific to GCC which make the compiler fail to */
|
||||
/* optimize a division and modulo operation on the same parameters */
|
||||
/* into a single call to `__aeabi_idivmod'. See */
|
||||
/* */
|
||||
/* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721 */
|
||||
#undef FT_DIV_MOD
|
||||
#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
|
||||
FT_BEGIN_STMNT \
|
||||
(quotient) = (type)( (dividend) / (divisor) ); \
|
||||
(remainder) = (type)( (dividend) - (quotient) * (divisor) ); \
|
||||
if ( (remainder) < 0 ) \
|
||||
{ \
|
||||
(quotient)--; \
|
||||
(remainder) += (type)(divisor); \
|
||||
} \
|
||||
FT_END_STMNT
|
||||
#endif /* __arm__ */
|
||||
|
||||
|
||||
/*************************************************************************/
|
||||
/* */
|
||||
/* TYPE DEFINITIONS */
|
||||
|
@ -548,7 +582,7 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
static void
|
||||
gray_record_cell( RAS_ARG )
|
||||
{
|
||||
if ( !ras.invalid && ( ras.area | ras.cover ) )
|
||||
if ( ras.area | ras.cover )
|
||||
{
|
||||
PCell cell = gray_find_cell( RAS_VAR );
|
||||
|
||||
|
@ -597,12 +631,12 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
|
||||
ras.area = 0;
|
||||
ras.cover = 0;
|
||||
}
|
||||
ras.ex = ex;
|
||||
ras.ey = ey;
|
||||
|
||||
ras.ex = ex;
|
||||
ras.ey = ey;
|
||||
ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
|
||||
ex >= ras.count_ex );
|
||||
ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
|
||||
ex >= ras.count_ex );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -686,13 +720,7 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
dx = -dx;
|
||||
}
|
||||
|
||||
delta = (TCoord)( p / dx );
|
||||
mod = (TCoord)( p % dx );
|
||||
if ( mod < 0 )
|
||||
{
|
||||
delta--;
|
||||
mod += (TCoord)dx;
|
||||
}
|
||||
FT_DIV_MOD( TCoord, p, dx, delta, mod );
|
||||
|
||||
ras.area += (TArea)(( fx1 + first ) * delta);
|
||||
ras.cover += delta;
|
||||
|
@ -706,14 +734,8 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
TCoord lift, rem;
|
||||
|
||||
|
||||
p = ONE_PIXEL * ( y2 - y1 + delta );
|
||||
lift = (TCoord)( p / dx );
|
||||
rem = (TCoord)( p % dx );
|
||||
if ( rem < 0 )
|
||||
{
|
||||
lift--;
|
||||
rem += (TCoord)dx;
|
||||
}
|
||||
p = ONE_PIXEL * ( y2 - y1 + delta );
|
||||
FT_DIV_MOD( TCoord, p, dx, lift, rem );
|
||||
|
||||
mod -= (int)dx;
|
||||
|
||||
|
@ -763,9 +785,6 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
dx = to_x - ras.x;
|
||||
dy = to_y - ras.y;
|
||||
|
||||
/* XXX: we should do something about the trivial case where dx == 0, */
|
||||
/* as it happens very often! */
|
||||
|
||||
/* perform vertical clipping */
|
||||
{
|
||||
TCoord min, max;
|
||||
|
@ -844,13 +863,7 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
dy = -dy;
|
||||
}
|
||||
|
||||
delta = (int)( p / dy );
|
||||
mod = (int)( p % dy );
|
||||
if ( mod < 0 )
|
||||
{
|
||||
delta--;
|
||||
mod += (TCoord)dy;
|
||||
}
|
||||
FT_DIV_MOD( int, p, dy, delta, mod );
|
||||
|
||||
x = ras.x + delta;
|
||||
gray_render_scanline( RAS_VAR_ ey1, ras.x, fy1, x, (TCoord)first );
|
||||
|
@ -861,13 +874,7 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
if ( ey1 != ey2 )
|
||||
{
|
||||
p = ONE_PIXEL * dx;
|
||||
lift = (int)( p / dy );
|
||||
rem = (int)( p % dy );
|
||||
if ( rem < 0 )
|
||||
{
|
||||
lift--;
|
||||
rem += (int)dy;
|
||||
}
|
||||
FT_DIV_MOD( int, p, dy, lift, rem );
|
||||
mod -= (int)dy;
|
||||
|
||||
while ( ey1 != ey2 )
|
||||
|
@ -1171,7 +1178,8 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
|
||||
|
||||
/* record current cell, if any */
|
||||
gray_record_cell( RAS_VAR );
|
||||
if ( !ras.invalid )
|
||||
gray_record_cell( RAS_VAR );
|
||||
|
||||
/* start to a new position */
|
||||
x = UPSCALE( to->x );
|
||||
|
@ -1781,7 +1789,8 @@ typedef ptrdiff_t FT_PtrDist;
|
|||
if ( ft_setjmp( ras.jump_buffer ) == 0 )
|
||||
{
|
||||
error = FT_Outline_Decompose( &ras.outline, &func_interface, &ras );
|
||||
gray_record_cell( RAS_VAR );
|
||||
if ( !ras.invalid )
|
||||
gray_record_cell( RAS_VAR );
|
||||
}
|
||||
else
|
||||
error = FT_THROW( Memory_Overflow );
|
||||
|
|
Loading…
Reference in New Issue