From 8f43d324d12431d297ca2f8136b221a58ff52269 Mon Sep 17 00:00:00 2001 From: Alexei Podtelezhnikov Date: Wed, 12 May 2021 00:04:59 -0400 Subject: [PATCH] [smooth] Faster bitmap sweeping. Selecting the fill rule or checking the direct mode each time we call `gray_hline' is sub-optimal. This effectively splits the direct mode into a separate code path while inlining `gray_hline' and saving 5-7% of rendering time. * src/smooth/ftgrays.c (gray_hline): Eliminated in favor of... (FT_FILL_RULE, FT_GRAY_SET): ... these new macros... (gray_sweep): ... inlined here. (gray_sweep_direct): New function that handles the direct span buffer. (gray_TWorker): Remove the span buffer. (gray_raster_render, gray_convert_glyph): Updated. --- ChangeLog | 16 ++++ src/smooth/ftgrays.c | 199 +++++++++++++++++++++++++------------------ 2 files changed, 134 insertions(+), 81 deletions(-) diff --git a/ChangeLog b/ChangeLog index a8542d2d0..d482631ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2021-05-07 Alexei Podtelezhnikov + + [smooth] Faster bitmap sweeping. + + Selecting the fill rule or checking the direct mode each time we call + `gray_hline' is sub-optimal. This effectively splits the direct mode + into a separate code path while inlining `gray_hline' and saving 5-7% + of rendering time. + + * src/smooth/ftgrays.c (gray_hline): Eliminated in favor of... + (FT_FILL_RULE, FT_GRAY_SET): ... these new macros... + (gray_sweep): ... inlined here. + (gray_sweep_direct): New function that handles the direct span buffer. + (gray_TWorker): Remove the span buffer. + (gray_raster_render, gray_convert_glyph): Updated. + 2021-05-10 Alexei Podtelezhnikov * src/smooth/ftgrays.c (gray_hline): Simplify even-odd computations. diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c index 94169a006..a40877e7d 100644 --- a/src/smooth/ftgrays.c +++ b/src/smooth/ftgrays.c @@ -392,6 +392,42 @@ typedef ptrdiff_t FT_PtrDist; ( sizeof( long ) * FT_CHAR_BIT - PIXEL_BITS ) ) + /* Scale area and apply fill rule to calculate the coverage byte. */ + /* The top fill bit is used for the non-zero rule. The eighth */ + /* fill bit is used for the even-odd rule. The higher coverage */ + /* bytes are either clamped for the non-zero-rule or discarded */ + /* later for the even-odd rule. */ +#define FT_FILL_RULE( coverage, area, fill ) \ + FT_BEGIN_STMNT \ + coverage = (int)( area >> ( PIXEL_BITS * 2 + 1 - 8 ) ); \ + if ( coverage & fill ) \ + coverage = ~coverage; \ + if ( coverage > 255 && fill & INT_MIN ) \ + coverage = 255; \ + FT_END_STMNT + + + /* It is faster to write small spans byte-by-byte than calling */ + /* `memset'. This is mainly due to the cost of the function call. */ +#define FT_GRAY_SET( d, s, count ) \ + FT_BEGIN_STMNT \ + unsigned char* q = d; \ + unsigned char c = (unsigned char)s; \ + switch ( count ) \ + { \ + case 7: *q++ = c; /* fall through */ \ + case 6: *q++ = c; /* fall through */ \ + case 5: *q++ = c; /* fall through */ \ + case 4: *q++ = c; /* fall through */ \ + case 3: *q++ = c; /* fall through */ \ + case 2: *q++ = c; /* fall through */ \ + case 1: *q = c; /* fall through */ \ + case 0: break; \ + default: FT_MEM_SET( d, s, count ); \ + } \ + FT_END_STMNT + + /************************************************************************** * * TYPE DEFINITIONS @@ -463,8 +499,6 @@ typedef ptrdiff_t FT_PtrDist; FT_Raster_Span_Func render_span; void* render_span_data; - FT_Span spans[FT_MAX_GRAY_SPANS]; - int num_spans; } gray_TWorker, *gray_PWorker; @@ -1171,94 +1205,62 @@ typedef ptrdiff_t FT_PtrDist; static void - gray_hline( RAS_ARG_ TCoord x, - TCoord y, - TArea coverage, - TCoord acount ) + gray_sweep( RAS_ARG ) { - /* scale the coverage from 0..(ONE_PIXEL*ONE_PIXEL*2) to 0..256 */ - coverage >>= PIXEL_BITS * 2 + 1 - 8; + int fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN; + int coverage; + int y; - /* compute the line's coverage depending on the outline fill rule */ - if ( ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ) + + for ( y = ras.min_ey; y < ras.max_ey; y++ ) { - if ( coverage & 256 ) /* odd bit */ - coverage = ~coverage; + PCell cell = ras.ycells[y - ras.min_ey]; + TCoord x = ras.min_ex; + TArea cover = 0; + TArea area; - /* higher bits discarded below */ - } - else /* default non-zero winding rule */ - { - if ( coverage < 0 ) - coverage = ~coverage; /* the same as -coverage - 1 */ - - if ( coverage >= 256 ) - coverage = 255; - } - - if ( ras.num_spans >= 0 ) /* for FT_RASTER_FLAG_DIRECT only */ - { - FT_Span* span = ras.spans + ras.num_spans++; + unsigned char* line = ras.target.origin - ras.target.pitch * y; - span->x = (short)x; - span->len = (unsigned short)acount; - span->coverage = (unsigned char)coverage; - - if ( ras.num_spans == FT_MAX_GRAY_SPANS ) + for ( ; cell != NULL; cell = cell->next ) { - /* flush the span buffer and reset the count */ - ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data ); - ras.num_spans = 0; + if ( cover != 0 && cell->x > x ) + { + FT_FILL_RULE( coverage, cover, fill ); + FT_GRAY_SET( line + x, coverage, cell->x - x ); + } + + cover += (TArea)cell->cover * ( ONE_PIXEL * 2 ); + area = cover - cell->area; + + if ( area != 0 && cell->x >= ras.min_ex ) + { + FT_FILL_RULE( coverage, area, fill ); + line[cell->x] = (unsigned char)coverage; + } + + x = cell->x + 1; } - } - else - { - unsigned char* q = ras.target.origin - ras.target.pitch * y + x; - unsigned char c = (unsigned char)coverage; - - /* For small-spans it is faster to do it by ourselves than - * calling `memset'. This is mainly due to the cost of the - * function call. - */ - switch ( acount ) + if ( cover != 0 ) /* only if cropped */ { - case 7: - *q++ = c; - /* fall through */ - case 6: - *q++ = c; - /* fall through */ - case 5: - *q++ = c; - /* fall through */ - case 4: - *q++ = c; - /* fall through */ - case 3: - *q++ = c; - /* fall through */ - case 2: - *q++ = c; - /* fall through */ - case 1: - *q = c; - /* fall through */ - case 0: - break; - default: - FT_MEM_SET( q, c, acount ); + FT_FILL_RULE( coverage, cover, fill ); + FT_GRAY_SET( line + x, coverage, ras.max_ex - x ); } } } static void - gray_sweep( RAS_ARG ) + gray_sweep_direct( RAS_ARG ) { + int fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN; + int coverage; int y; + FT_Span span[FT_MAX_GRAY_SPANS]; + int n = 0; + for ( y = ras.min_ey; y < ras.max_ey; y++ ) { @@ -1271,25 +1273,59 @@ typedef ptrdiff_t FT_PtrDist; for ( ; cell != NULL; cell = cell->next ) { if ( cover != 0 && cell->x > x ) - gray_hline( RAS_VAR_ x, y, cover, cell->x - x ); + { + FT_FILL_RULE( coverage, cover, fill ); + + span[n].coverage = (unsigned char)coverage; + span[n].x = (short)x; + span[n].len = (unsigned short)( cell->x - x ); + + if ( ++n == FT_MAX_GRAY_SPANS ) + { + /* flush the span buffer and reset the count */ + ras.render_span( y, n, span, ras.render_span_data ); + n = 0; + } + } cover += (TArea)cell->cover * ( ONE_PIXEL * 2 ); area = cover - cell->area; if ( area != 0 && cell->x >= ras.min_ex ) - gray_hline( RAS_VAR_ cell->x, y, area, 1 ); + { + FT_FILL_RULE( coverage, area, fill ); + + span[n].coverage = (unsigned char)coverage; + span[n].x = (short)cell->x; + span[n].len = 1; + + if ( ++n == FT_MAX_GRAY_SPANS ) + { + /* flush the span buffer and reset the count */ + ras.render_span( y, n, span, ras.render_span_data ); + n = 0; + } + } x = cell->x + 1; } - if ( cover != 0 ) - gray_hline( RAS_VAR_ x, y, cover, ras.max_ex - x ); + if ( cover != 0 ) /* only if cropped */ + { + FT_FILL_RULE( coverage, cover, fill ); - if ( ras.num_spans > 0 ) /* for FT_RASTER_FLAG_DIRECT only */ + span[n].coverage = (unsigned char)coverage; + span[n].x = (short)x; + span[n].len = (unsigned short)( ras.max_ex - x ); + + ++n; + } + + if ( n ) { /* flush the span buffer and reset the count */ - ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data ); - ras.num_spans = 0; + ras.render_span( y, n, span, ras.render_span_data ); + n = 0; } } } @@ -1688,7 +1724,10 @@ typedef ptrdiff_t FT_PtrDist; if ( !error ) { - gray_sweep( RAS_VAR ); + if ( ras.render_span ) /* for FT_RASTER_FLAG_DIRECT only */ + gray_sweep_direct( RAS_VAR ); + else + gray_sweep( RAS_VAR ); band--; continue; } @@ -1757,7 +1796,6 @@ typedef ptrdiff_t FT_PtrDist; ras.render_span = (FT_Raster_Span_Func)params->gray_spans; ras.render_span_data = params->user; - ras.num_spans = 0; ras.min_ex = params->clip_box.xMin; ras.min_ey = params->clip_box.yMin; @@ -1787,7 +1825,6 @@ typedef ptrdiff_t FT_PtrDist; ras.render_span = (FT_Raster_Span_Func)NULL; ras.render_span_data = NULL; - ras.num_spans = -1; /* invalid */ ras.min_ex = 0; ras.min_ey = 0;