/** The rasterizer for the 'dense' renderer */ #undef FT_COMPONENT #define FT_COMPONENT dense #include #include #include #include #include #include "ftdense.h" #include "ftdenseerrs.h" #if defined( __SSE4_1__ ) || \ defined( __x86_64__ ) || \ defined( _M_AMD64 ) || \ ( defined( _M_IX86_FP ) && _M_IX86_FP >= 2 ) #define FT_SSE4_1 1 #else #define FT_SSE4_1 0 #endif #if defined(__ARM_NEON) #define FT_NEON 1 #else #define FT_NEON 0 #endif #if FT_SSE4_1 #include #elif FT_NEON #include #endif #define PIXEL_BITS 8 #define ONE_PIXEL ( 1 << PIXEL_BITS ) #define TRUNC( x ) (int)( ( x ) >> PIXEL_BITS ) #define UPSCALE( x ) ( ( x ) * ( ONE_PIXEL >> 6 ) ) #define DOWNSCALE( x ) ( ( x ) >> ( PIXEL_BITS - 6 ) ) #define FT_SWAP(a, b) { (a) = (a) + (b); (b) = (a) - (b); (a) = (a) - (b);} #define FT_MIN( a, b ) ( (a) < (b) ? (a) : (b) ) #define FT_MAX( a, b ) ( (a) > (b) ? (a) : (b) ) #define FT_ABS( a ) ( (a) < 0 ? -(a) : (a) ) // TODO: Fix types #define FT_UDIVPREP( c, b ) \ FT26D6 b ## _r = c ? (FT26D6)0xFFFFFFFF / ( b ) : 0 #define FT_UDIV( a, b ) \ (FT26D6)( ( (FT26D6)( a ) * (FT26D6)( b ## _r ) ) >> 32 ) typedef struct dense_TRaster_ { void* memory; } dense_TRaster, *dense_PRaster; /* Linear interpolation between P0 and P1 */ static FT_Vector Lerp( float T, FT_Vector P0, FT_Vector P1 ) { FT_Vector p; p.x = P0.x + T * ( P1.x - P0.x ); p.y = P0.y + T * ( P1.y - P0.y ); return p; } static int dense_move_to( const FT_Vector* to, dense_worker* worker ) { FT_Pos x, y; x = UPSCALE( to->x ); y = UPSCALE( to->y ); worker->prev_x = x; worker->prev_y = y; return 0; } static int dense_line_to( const FT_Vector* to, dense_worker* worker ) { dense_render_line( worker, UPSCALE( to->x ), UPSCALE( to->y ) ); dense_move_to( to, worker ); return 0; } void dense_render_line2( dense_worker* worker, FT_PreLine pl ) { FT26D6 fx = UPSCALE(pl->x1)>>2; FT26D6 fy = UPSCALE(pl->y1)>>2; FT26D6 from_x = fx; FT26D6 from_y = fy; FT26D6 tx = UPSCALE(pl->x2)>>2; FT26D6 ty = UPSCALE(pl->y2)>>2; if ( fy == ty ) return; FT26D6 to_x = tx; FT26D6 to_y = ty; int dir = 1; if ( from_y >= to_y ) { dir = -1; FT_SWAP(from_x, to_x); FT_SWAP(from_y, to_y); } // Clip to the height. if ( from_y >= worker->m_h<<6 || to_y <= 0 ) return; FT26D6 deltax,deltay; deltax = to_x - from_x; deltay = to_y - from_y; FT_UDIVPREP(from_x != to_x, deltax); FT_UDIVPREP(from_y != to_y, deltay); if ( from_y < 0 ) { from_x -= from_y * deltax/deltay; from_y = 0; } if ( to_y > worker->m_h<<6 ) { to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay); to_y = worker->m_h<<6; } if(deltax == 0){ FT26D6 x = from_x; int x0i = x>>6; FT26D6 x0floor = x0i<<6; // y-coordinate of first pixel of line int y0 = from_y>>6; // y-coordinate of last pixel of line int y_limit = (to_y + 0x3f)>>6; FT20D12* m_a = worker->m_a; for ( int y = y0; y < y_limit; y++ ) { int linestart = y * worker->m_w; FT26D6 dy = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y ); m_a[linestart + x0i] += dir*dy*(64 - x + x0floor); m_a[linestart + ( x0i + 1 )] += dir*dy*(x-x0floor); } } else { int x = from_x; int y0 = from_y>>6; int y_limit = (to_y + 0x3f)>>6; FT20D12* m_a = worker->m_a; for ( int y = y0; y < y_limit; y++ ) { int linestart = y * worker->m_w; FT26D6 dy = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y ); FT26D6 xnext = x + FT_UDIV((dy*deltax), deltay); FT26D6 d = dy * dir; FT26D6 x0, x1; if ( x < xnext ) { x0 = x; x1 = xnext; } else { x0 = xnext; x1 = x; } int x0i = x0>>6; FT26D6 x0floor = x0i<<6; int x1i = (x1+0x3f)>>6; FT26D6 x1ceil = x1i <<6; if ( x1i <= x0i + 1 ) { FT26D6 xmf = ( ( x + xnext )>>1) - x0floor; m_a[linestart + x0i] += d * ((1<<6) - xmf); m_a[linestart + ( x0i + 1 )] += d * xmf; } else { FT26D6 oneOverS = x1 - x0; FT_UDIVPREP(x1 != x0, oneOverS); FT26D6 x0f = x0 - x0floor; FT26D6 oneMinusX0f = (1<<6) - x0f; FT26D6 a0 = FT_UDIV(((oneMinusX0f * oneMinusX0f) >> 1), oneOverS); FT26D6 x1f = x1 - x1ceil + (1<<6); FT26D6 am = FT_UDIV(((x1f * x1f) >> 1) , oneOverS); m_a[linestart + x0i] += d * a0; if ( x1i == x0i + 2 ) m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am ); else { FT26D6 a1 = FT_UDIV((((1<<6) + (1<<5) - x0f) << 6) , oneOverS); m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 ); FT26D6 dTimesS = FT_UDIV((d << 12) , oneOverS); for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ ) m_a[linestart + xi] += dTimesS; FT26D6 a2 = a1 + FT_UDIV((( x1i - x0i - 3 )<<12),oneOverS); m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am ); } m_a[linestart + x1i] += d * am; } x = xnext; } } } void dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy ) { FT26D6 fx = worker->prev_x>>2; FT26D6 fy = worker->prev_y>>2; FT26D6 from_x = fx; FT26D6 from_y = fy; FT26D6 tx = tox>>2; FT26D6 ty = toy>>2; if ( fy == ty ) return; FT26D6 to_x = tx; FT26D6 to_y = ty; int dir = 1; if ( from_y >= to_y ) { dir = -1; FT_SWAP(from_x, to_x); FT_SWAP(from_y, to_y); } // Clip to the height. if ( from_y >= worker->m_h<<6 || to_y <= 0 ) return; FT26D6 deltax,deltay; deltax = to_x - from_x; deltay = to_y - from_y; FT_UDIVPREP(from_x != to_x, deltax); FT_UDIVPREP(from_y != to_y, deltay); if ( from_y < 0 ) { from_x -= from_y * deltax/deltay; from_y = 0; } if ( to_y > worker->m_h<<6 ) { to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay); to_y = worker->m_h<<6; } if(deltax == 0){ FT26D6 x = from_x; int x0i = x>>6; FT26D6 x0floor = x0i<<6; // y-coordinate of first pixel of line int y0 = from_y>>6; // y-coordinate of last pixel of line int y_limit = (to_y + 0x3f)>>6; FT20D12* m_a = worker->m_a; for ( int y = y0; y < y_limit; y++ ) { int linestart = y * worker->m_w; FT26D6 dy = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y ); m_a[linestart + x0i] += dir*dy*(64 - x + x0floor); m_a[linestart + ( x0i + 1 )] += dir*dy*(x-x0floor); } } else { int x = from_x; int y0 = from_y>>6; int y_limit = (to_y + 0x3f)>>6; FT20D12* m_a = worker->m_a; for ( int y = y0; y < y_limit; y++ ) { int linestart = y * worker->m_w; FT26D6 dy = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y ); FT26D6 xnext = x + FT_UDIV((dy*deltax), deltay); FT26D6 d = dy * dir; FT26D6 x0, x1; if ( x < xnext ) { x0 = x; x1 = xnext; } else { x0 = xnext; x1 = x; } int x0i = x0>>6; FT26D6 x0floor = x0i<<6; int x1i = (x1+0x3f)>>6; FT26D6 x1ceil = x1i <<6; if ( x1i <= x0i + 1 ) { FT26D6 xmf = ( ( x + xnext )>>1) - x0floor; m_a[linestart + x0i] += d * ((1<<6) - xmf); m_a[linestart + ( x0i + 1 )] += d * xmf; } else { FT26D6 oneOverS = x1 - x0; FT_UDIVPREP(x1 != x0, oneOverS); FT26D6 x0f = x0 - x0floor; FT26D6 oneMinusX0f = (1<<6) - x0f; FT26D6 a0 = FT_UDIV(((oneMinusX0f * oneMinusX0f) >> 1), oneOverS); FT26D6 x1f = x1 - x1ceil + (1<<6); FT26D6 am = FT_UDIV(((x1f * x1f) >> 1) , oneOverS); m_a[linestart + x0i] += d * a0; if ( x1i == x0i + 2 ) m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am ); else { FT26D6 a1 = FT_UDIV((((1<<6) + (1<<5) - x0f) << 6) , oneOverS); m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 ); FT26D6 dTimesS = FT_UDIV((d << 12) , oneOverS); for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ ) m_a[linestart + xi] += dTimesS; FT26D6 a2 = a1 + FT_UDIV((( x1i - x0i - 3 )<<12),oneOverS); m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am ); } m_a[linestart + x1i] += d * am; } x = xnext; } } } static int dense_conic_to( const FT_Vector* control, const FT_Vector* to, dense_worker* worker ) { dense_render_quadratic( worker, control, to ); return 0; } void dense_render_quadratic( dense_worker* worker, FT_Vector* control, FT_Vector* to ) { /* Calculate devsq as the square of four times the distance from the control point to the midpoint of the curve. This is the place at which the curve is furthest from the line joining the control points. 4 x point on curve = p0 + 2p1 + p2 4 x midpoint = 4p1 The division by four is omitted to save time. */ FT_Vector aP0 = { DOWNSCALE( worker->prev_x ), DOWNSCALE( worker->prev_y ) }; FT_Vector aP1 = { control->x, control->y }; FT_Vector aP2 = { to->x, to->y }; float devx = aP0.x - aP1.x - aP1.x + aP2.x; float devy = aP0.y - aP1.y - aP1.y + aP2.y; float devsq = devx * devx + devy * devy; if ( devsq < 0.333f ) { dense_line_to( &aP2, worker ); return; } /* According to Raph Levien, the reason for the subdivision by n (instead of recursive division by the Casteljau system) is that "I expect the flatness computation to be semi-expensive (it's done once rather than on each potential subdivision) and also because you'll often get fewer subdivisions. Taking a circular arc as a simplifying assumption, where I get n, a recursive approach would get 2^ceil(lg n), which, if I haven't made any horrible mistakes, is expected to be 33% more in the limit". */ const float tol = 3.0f; int n = (int)floor( sqrt( sqrt( tol * devsq ) ) )/8; FT_Vector p = aP0; float nrecip = 1.0f / ( n + 1.0f ); float t = 0.0f; for ( int i = 0; i < n; i++ ) { t += nrecip; FT_Vector next = Lerp( t, Lerp( t, aP0, aP1 ), Lerp( t, aP1, aP2 ) ); dense_line_to(&next, worker ); p = next; } dense_line_to( &aP2, worker ); } static int dense_cubic_to( const FT_Vector* control1, const FT_Vector* control2, const FT_Vector* to, dense_worker* worker ) { dense_render_cubic( worker, control1, control2, to ); return 0; } void dense_render_cubic( dense_worker* worker, FT_Vector* control_1, FT_Vector* control_2, FT_Vector* to ) { FT_Vector aP0 = { DOWNSCALE( worker->prev_x ), DOWNSCALE( worker->prev_y ) }; FT_Vector aP1 = { control_1->x, control_1->y }; FT_Vector aP2 = { control_2->x, control_2->y }; FT_Vector aP3 = { to->x, to->y }; float devx = aP0.x - aP1.x - aP1.x + aP2.x; float devy = aP0.y - aP1.y - aP1.y + aP2.y; float devsq0 = devx * devx + devy * devy; devx = aP1.x - aP2.x - aP2.x + aP3.x; devy = aP1.y - aP2.y - aP2.y + aP3.y; float devsq1 = devx * devx + devy * devy; float devsq = fmax( devsq0, devsq1 ); if ( devsq < 0.333f ) { dense_render_line( worker, aP3.x, aP3.y ); return; } const float tol = 3.0f; int n = (int)floor( sqrt( sqrt( tol * devsq ) ) ) / 8; FT_Vector p = aP0; float nrecip = 1.0f / ( n + 1.0f ); float t = 0.0f; for ( int i = 0; i < n; i++ ) { t += nrecip; FT_Vector a = Lerp( t, Lerp( t, aP0, aP1 ), Lerp( t, aP1, aP2 ) ); FT_Vector b = Lerp( t, Lerp( t, aP1, aP2 ), Lerp( t, aP2, aP3 ) ); FT_Vector next = Lerp( t, a, b ); dense_render_line( worker, next.x, next.y ); worker->prev_x = next.x; worker->prev_y = next.y; p = next; } dense_line_to( &aP3, worker ); } static int dense_raster_new( FT_Memory memory, dense_PRaster* araster ) { FT_Error error; dense_PRaster raster; if ( !FT_NEW( raster ) ) raster->memory = memory; *araster = raster; return error; } static void dense_raster_done( FT_Raster raster ) { FT_Memory memory = (FT_Memory)( (dense_PRaster)raster )->memory; FT_FREE( raster ); } static void dense_raster_reset( FT_Raster raster, unsigned char* pool_base, unsigned long pool_size ) { FT_UNUSED( raster ); FT_UNUSED( pool_base ); FT_UNUSED( pool_size ); } static int dense_raster_set_mode( FT_Raster raster, unsigned long mode, void* args ) { FT_UNUSED( raster ); FT_UNUSED( mode ); FT_UNUSED( args ); return 0; /* nothing to do */ } FT_DEFINE_OUTLINE_FUNCS( dense_decompose_funcs, (FT_Outline_MoveTo_Func)dense_move_to, /* move_to */ (FT_Outline_LineTo_Func)dense_line_to, /* line_to */ (FT_Outline_ConicTo_Func)dense_conic_to, /* conic_to */ (FT_Outline_CubicTo_Func)dense_cubic_to, /* cubic_to */ 0, /* shift */ 0 /* delta */ ) static int dense_render_glyph( dense_worker* worker, const FT_Bitmap* target, FT_PreLine pl ) { FT_Error error = 0; while (pl != NULL) { dense_render_line2(worker, pl); pl = pl->next; } // Render into bitmap const FT20D12* source = worker->m_a; unsigned char* dest = target->buffer; unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h; #if FT_SSE4_1 __m128i offset = _mm_setzero_si128(); __m128i nzero = _mm_castps_si128(_mm_set1_ps(-0.0)); for (int i = 0; i < worker->m_h*worker->m_w; i += 4) { // load 4 floats from source __m128i x = _mm_load_si128( (__m128i*)&source[i] ); x = _mm_add_epi32( x, _mm_slli_si128( x, 4 ) ); x = _mm_add_epi32( x, _mm_slli_si128( x, 8 ) ); // add the prefix sum of previous 4 ints to all ints x = _mm_add_epi32( x, offset ); // take absolute value __m128i y = _mm_srli_epi32( _mm_abs_epi32( x) , 4 ); y = _mm_packus_epi16(_mm_packs_epi32(y, nzero), nzero); _mm_storeu_si32(&dest[i], y); // store the current prefix sum in offset offset = _mm_shuffle_epi32(x,_MM_SHUFFLE( 3, 3, 3, 3 ) ); } #elif FT_NEON int32x4_t offset = vdupq_n_s32(0); int32x4_t nzero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0)); for (int i = 0; i < worker->m_h*worker->m_w; i += 4) { // load 4 floats from source int32x4_t x = vld1q_s32( (int32_t*)&source[i] ); x = vaddq_s32( x, vreinterpretq_s32_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_s32( x), 12) )); x = vaddq_s32(x, vreinterpretq_s32_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_s32(x), 8))); // add the prefsum of previous 4 floats to all current floats x = vaddq_s32( x, offset ); int32x4_t y = vshrq_n_s32( vabsq_s32( x) , 4 ); y = vreinterpretq_s32_s16(vcombine_s16(vqmovn_s32(y), vqmovn_s32(nzero))); y = vreinterpretq_s32_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_s32(y)), vqmovun_s16(vreinterpretq_s16_s32(nzero)))); vst1q_s32(&dest[i], y); offset = vdupq_laneq_s32(x,3 ); } #else FT20D12 value = 0; while ( dest < dest_end ) { value += *source++; if(value > 0){ int n = value >>4; if(n>255)n=255; *dest = (unsigned char)n; }else{ *dest = 0; } dest++; } #endif /* FT_SSE4_1 || FT_NEON */ free(worker->m_a); return error; } static int dense_raster_render( FT_Raster raster, const FT_Raster_Params* params ) { const FT_Outline* outline = (const FT_Outline*)params->source; FT_Bitmap* target_map = params->target; FT_PreLine pl = params->prelines; dense_worker worker[1]; if ( !raster ) return FT_THROW( Invalid_Argument ); if ( !outline ) return FT_THROW( Invalid_Outline ); worker->outline = *outline; if ( !target_map ) return FT_THROW( Invalid_Argument ); /* nothing to do */ if ( !target_map->width || !target_map->rows ) return 0; if ( !target_map->buffer ) return FT_THROW( Invalid_Argument ); worker->m_origin_x = 0; worker->m_origin_y = 0; worker->m_w = target_map->pitch; worker->m_h = target_map->rows; int size = (worker->m_w * worker->m_h + 3) & ~3; worker->m_a = malloc( sizeof( FT20D12 ) * size ); worker->m_a_size = size; memset( worker->m_a, 0, ( sizeof( FT20D12 ) * size ) ); /* exit if nothing to do */ if ( worker->m_w <= worker->m_origin_x || worker->m_h <= worker->m_origin_y ) { return 0; } // Invert the pitch to account for different +ve y-axis direction in dense array // (maybe temporary solution) target_map->pitch *= -1; return dense_render_glyph( worker, target_map, pl ); } FT_DEFINE_RASTER_FUNCS( ft_dense_raster, FT_GLYPH_FORMAT_OUTLINE, (FT_Raster_New_Func)dense_raster_new, /* raster_new */ (FT_Raster_Reset_Func)dense_raster_reset, /* raster_reset */ (FT_Raster_Set_Mode_Func)dense_raster_set_mode, /* raster_set_mode */ (FT_Raster_Render_Func)dense_raster_render, /* raster_render */ (FT_Raster_Done_Func)dense_raster_done /* raster_done */ ) /* END */