[smooth] Minor speedup to smooth rasterizer

This speeds up the smooth rasterizer by avoiding a
conditional branches in the hot path. Namely:

- Define a fixed "null cell" which will be pointed
  to whenever the current cell is outside of the current
  target region. This avoids a "ras.cell != NULL"
  check in the FT_INTEGRATE() macro.

- Also use the null cell as a sentinel at the end of
  all ycells[] linked-lists, by setting its x coordinate
  to INT_MAX. This avoids a 'if (!cell)' check in
  gray_set_cell() as well.

- Slightly change the worker struct fields to perform
  a little less operations during rendering.

Example results (on a 2013 Corei5-3337U CPU)

  out/ftbench -p -s10 -t5 -bc /usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf

    Before: 5.472 us/op
    After:  5.275 us/op

  out/ftbench -p -s60 -t5 -bc /usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf

    Before: 17.988 us/op
    After:  17.389 us/op
This commit is contained in:
David Turner 2021-06-19 11:18:40 +02:00
parent 5ec7f58831
commit 86b9c9347f
2 changed files with 73 additions and 30 deletions

View File

@ -1,3 +1,15 @@
2021-07-15 David Turner <david@freetype.org>
[smooth] Minor speedup to smooth rasterizer
This speeds up the smooth rasterizer by avoiding a conditional
branches in the hot path.
* src/smooth/ftgrays.c: Define a null cell used to both as a
sentinel for all linked-lists, and to accumulate coverage and
area values for "out-of-bounds" cell positions without a
conditional check.
2021-07-15 David Turner <david@freetype.org>
Replaces download-test-fonts.sh with download-test-fonts.py which

View File

@ -479,19 +479,24 @@ typedef ptrdiff_t FT_PtrDist;
{
ft_jmp_buf jump_buffer;
TCoord min_ex, max_ex;
TCoord min_ex, max_ex; /* min and max integer pixel coordinates */
TCoord min_ey, max_ey;
TCoord count_ey; /* same as (max_ey - min_ey) */
PCell cell;
PCell* ycells;
PCell cells;
FT_PtrDist max_cells;
FT_PtrDist num_cells;
PCell cell; /* current cell */
PCell cell_free; /* call allocation next free slot */
PCell cell_limit; /* cell allocation limit */
TPos x, y;
PCell* ycells; /* array of cell linked-lists, one per */
/* vertical coordinate in the current band. */
FT_Outline outline;
TPixmap target;
PCell cells; /* cell storage area */
FT_PtrDist max_cells; /* cell storage capacity */
TPos x, y; /* last point position */
FT_Outline outline; /* input outline */
TPixmap target; /* target pixmap */
FT_Raster_Span_Func render_span;
void* render_span_data;
@ -502,21 +507,34 @@ typedef ptrdiff_t FT_PtrDist;
#pragma warning( pop )
#endif
#ifndef FT_STATIC_RASTER
#define ras (*worker)
#else
static gray_TWorker ras;
#endif
#define FT_INTEGRATE( ras, a, b ) \
if ( ras.cell ) \
ras.cell->cover += (a), ras.cell->area += (a) * (TArea)(b)
/* Return a pointer to the "null cell", used as a sentinel at the end */
/* of all ycells[] linked lists. Its x coordinate should be maximal */
/* to ensure no NULL checks are necessary when looking for an insertion */
/* point in gray_set_cell(). Other loops should check the cell pointer */
/* with CELL_IS_NULL() to detect the end of the list. */
#define NULL_CELL_PTR(ras) (ras).cells
/* The |x| value of the null cell. Must be the largest possible */
/* integer value stored in a TCell.x field. */
#define CELL_MAX_X_VALUE INT_MAX
/* Return true iff |cell| points to the null cell. */
#define CELL_IS_NULL(cell) ((cell)->x == CELL_MAX_X_VALUE)
#define FT_INTEGRATE( ras, a, b ) \
ras.cell->cover += (a), ras.cell->area += (a) * (TArea)(b)
typedef struct gray_TRaster_
{
void* memory;
void* memory;
} gray_TRaster, *gray_PRaster;
@ -538,7 +556,7 @@ typedef ptrdiff_t FT_PtrDist;
printf( "%3d:", y );
for ( ; cell != NULL; cell = cell->next )
for ( ; !CELL_IS_NULL(cell); cell = cell->next )
printf( " (%3d, c:%4d, a:%6d)",
cell->x, cell->cover, cell->area );
printf( "\n" );
@ -566,11 +584,12 @@ typedef ptrdiff_t FT_PtrDist;
/* Note that if a cell is to the left of the clipping region, it is */
/* actually set to the (min_ex-1) horizontal position. */
if ( ey >= ras.max_ey || ey < ras.min_ey || ex >= ras.max_ex )
ras.cell = NULL;
TCoord ey_index = ey - ras.min_ey;
if ( ey_index < 0 || ey_index >= ras.count_ey || ex >= ras.max_ex )
ras.cell = NULL_CELL_PTR(ras);
else
{
PCell* pcell = ras.ycells + ey - ras.min_ey;
PCell* pcell = ras.ycells + ey_index;
PCell cell;
@ -580,7 +599,7 @@ typedef ptrdiff_t FT_PtrDist;
{
cell = *pcell;
if ( !cell || cell->x > ex )
if ( cell->x > ex )
break;
if ( cell->x == ex )
@ -589,11 +608,11 @@ typedef ptrdiff_t FT_PtrDist;
pcell = &cell->next;
}
if ( ras.num_cells >= ras.max_cells )
/* insert new cell */
cell = ras.cell_free++;
if (cell >= ras.cell_limit)
ft_longjmp( ras.jump_buffer, 1 );
/* insert new cell */
cell = ras.cells + ras.num_cells++;
cell->x = ex;
cell->area = 0;
cell->cover = 0;
@ -1218,7 +1237,7 @@ typedef ptrdiff_t FT_PtrDist;
unsigned char* line = ras.target.origin - ras.target.pitch * y;
for ( ; cell != NULL; cell = cell->next )
for ( ; !CELL_IS_NULL(cell); cell = cell->next )
{
if ( cover != 0 && cell->x > x )
{
@ -1266,7 +1285,7 @@ typedef ptrdiff_t FT_PtrDist;
TArea area;
for ( ; cell != NULL; cell = cell->next )
for ( ; !CELL_IS_NULL(cell); cell = cell->next )
{
if ( cover != 0 && cell->x > x )
{
@ -1646,8 +1665,8 @@ typedef ptrdiff_t FT_PtrDist;
FT_TRACE7(( "band [%d..%d]: %ld cell%s\n",
ras.min_ey,
ras.max_ey,
ras.num_cells,
ras.num_cells == 1 ? "" : "s" ));
ras.cell_free - ras.cells.,
ras.cell_free - ras.cells == 1 ? "" : "s" ));
}
else
{
@ -1690,8 +1709,18 @@ typedef ptrdiff_t FT_PtrDist;
ras.cells = buffer + n;
ras.max_cells = (FT_PtrDist)( FT_MAX_GRAY_POOL - n );
ras.cell_limit = ras.cells + ras.max_cells;
ras.ycells = (PCell*)buffer;
/* Initialize the null cell is at the start of the 'cells' array. */
/* Note that this requires ras.cell_free initialization to skip */
/* over the first entry in the array. */
PCell null_cell = NULL_CELL_PTR(ras);
null_cell->x = CELL_MAX_X_VALUE;
null_cell->area = 0;
null_cell->cover = 0;
null_cell->next = NULL;;
for ( y = yMin; y < yMax; )
{
ras.min_ey = y;
@ -1705,15 +1734,17 @@ typedef ptrdiff_t FT_PtrDist;
do
{
TCoord width = band[0] - band[1];
TCoord w;
int error;
for (w = 0; w < width; ++w)
ras.ycells[w] = null_cell;
FT_MEM_ZERO( ras.ycells, height * sizeof ( PCell ) );
ras.num_cells = 0;
ras.cell = NULL;
ras.cell_free = ras.cells + 1; /* NOTE: Skip over the null cell. */
ras.cell = null_cell;
ras.min_ey = band[1];
ras.max_ey = band[0];
ras.count_ey = width;
error = gray_convert_glyph_inner( RAS_VAR, continued );
continued = 1;