[dense] Migrate line drawing and accumulation to fixed-point

* src/dense/ftdense.h: (FT26D6, FT20D12): New typedefs

* src/dense/ftdense.c: dense_render_line, dense_render_glyph now
use fixed-point numbers for calculation

Disabled SIMD for now
This commit is contained in:
Anurag Thakur 2022-11-19 12:55:14 +05:30
parent bdeb70bc18
commit cd2e6217f3
2 changed files with 102 additions and 81 deletions

View File

@ -81,57 +81,65 @@ dense_line_to( const FT_Vector* to, dense_worker* worker )
void void
dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy ) dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
{ {
float from_x = worker->prev_x;
float from_y = worker->prev_y; FT26D6 fx = worker->prev_x>>2;
if ( from_y == toy ) FT26D6 fy = worker->prev_y>>2;
FT26D6 from_x = fx;
FT26D6 from_y = fy;
FT26D6 tx = tox>>2;
FT26D6 ty = toy>>2;
if ( fy == ty )
return; return;
FT26D6 to_x = tx;
FT26D6 to_y = ty;
from_x /= 256.0; int dir = 1;
from_y /= 256.0; if ( from_y >= to_y )
float to_x = tox / 256.0;
float to_y = toy / 256.0;
float dir;
if ( from_y < to_y )
dir = 1;
else
{ {
dir = -1; dir = -1;
FT_SWAP(from_x, to_x ); FT_SWAP(from_x, to_x);
FT_SWAP(from_y, to_y ); FT_SWAP(from_y, to_y);
} }
// Clip to the height. // Clip to the height.
if ( from_y >= worker->m_h || to_y <= 0 ) if ( from_y >= worker->m_h<<6 || to_y <= 0 )
return; return;
float dxdy = ( to_x - from_x ) / (float)( to_y - from_y ); FT26D6 deltax,deltay;
deltax = to_x - from_x;
deltay = to_y - from_y;
if ( from_y < 0 ) if ( from_y < 0 )
{ {
from_x -= from_y * dxdy; from_x -= from_y * deltax/deltay;
from_y = 0; from_y = 0;
} }
if ( to_y > worker->m_h )
if ( to_y > worker->m_h<<6 )
{ {
to_x -= ( to_y - worker->m_h ) * dxdy; to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay);
to_y = (float)worker->m_h; to_y = worker->m_h<<6;
} }
float x = from_x; int x = from_x;
int y0 = (int)from_y; int y0 = from_y>>6;
int y_limit = (int)ceil( to_y ); int y_limit = (to_y + 0x3f)>>6;
float* m_a = worker->m_a;
FT20D12* m_a = worker->m_a;
for ( int y = y0; y < y_limit; y++ ) for ( int y = y0; y < y_limit; y++ )
{ {
int linestart = y * worker->m_w; int linestart = y * worker->m_w;
float dy = fmin( y + 1.0f, to_y ) - fmax( (float)y, from_y ); FT26D6 dy = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
float xnext = x + dxdy * dy; FT26D6 xnext = x + dy * deltax/deltay;
float d = dy * dir; FT26D6 d = dy * dir;
float x0, x1; FT26D6 x0, x1;
if ( x < xnext ) if ( x < xnext )
{ {
x0 = x; x0 = x;
@ -143,40 +151,48 @@ dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
x1 = x; x1 = x;
} }
/*
It's possible for x0 to be negative on the last scanline because of
floating-point inaccuracy That would cause an out-of-bounds array access at
index -1.
*/
float x0floor = x0 <= 0.0f ? 0.0f : (float)floor( x0 );
int x0i = (int)x0floor; int x0i = x0>>6;
float x1ceil = (float)ceil( x1 ); FT26D6 x0floor = x0i<<6;
int x1i = (int)x1ceil;
int x1i = (x1+0x3f)>>6;
FT26D6 x1ceil = x1i <<6;
if ( x1i <= x0i + 1 ) if ( x1i <= x0i + 1 )
{ {
float xmf = 0.5f * ( x + xnext ) - x0floor; FT26D6 xmf = ( ( x + xnext )>>1) - x0floor;
m_a[linestart + x0i] += d - d * xmf; m_a[linestart + x0i] += d * ((1<<6) - xmf);
m_a[linestart + ( x0i + 1 )] += d * xmf; m_a[linestart + ( x0i + 1 )] += d * xmf;
} }
else else
{ {
float s = 1.0f / ( x1 - x0 );
float x0f = x0 - x0floor; FT26D6 oneOverS = x1 - x0;
float a0 = 0.5f * s * ( 1.0f - x0f ) * ( 1.0f - x0f ); FT26D6 x0f = x0 - x0floor;
float x1f = x1 - x1ceil + 1.0f;
float am = 0.5f * s * x1f * x1f;
FT26D6 oneMinusX0f = (1<<6) - x0f;
FT26D6 a0 = ((oneMinusX0f * oneMinusX0f) >> 1) / oneOverS;
FT26D6 x1f = x1 - x1ceil + (1<<6);
FT26D6 am = ((x1f * x1f) >> 1) / oneOverS;
m_a[linestart + x0i] += d * a0; m_a[linestart + x0i] += d * a0;
if ( x1i == x0i + 2 ) if ( x1i == x0i + 2 )
m_a[linestart + ( x0i + 1 )] += d * ( 1.0f - a0 - am ); m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am );
else else
{ {
float a1 = s * ( 1.5f - x0f ); FT26D6 a1 = (((1<<6) + (1<<5) - x0f) << 6) / oneOverS;
m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 ); m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
for ( int xi = x0i + 2; xi < x1i - 1; xi++ )
m_a[linestart + xi] += d * s; FT26D6 dTimesS = (d << 12) / oneOverS;
float a2 = a1 + ( x1i - x0i - 3 ) * s;
m_a[linestart + ( x1i - 1 )] += d * ( 1.0f - a2 - am ); for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ )
m_a[linestart + xi] += dTimesS;
FT26D6 a2 = a1 + (( x1i - x0i - 3 )<<12)/oneOverS;
m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am );
} }
m_a[linestart + x1i] += d * am; m_a[linestart + x1i] += d * am;
} }
@ -364,48 +380,49 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
FT_Error error = FT_Outline_Decompose( &( worker->outline ), FT_Error error = FT_Outline_Decompose( &( worker->outline ),
&dense_decompose_funcs, worker ); &dense_decompose_funcs, worker );
// Render into bitmap // Render into bitmap
const float* source = worker->m_a; const FT20D12* source = worker->m_a;
unsigned char* dest = target->buffer; unsigned char* dest = target->buffer;
unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h; unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
#if FT_SSE4_1 //#if FT_SSE4_1
__m128 offset = _mm_setzero_ps(); // __m128 offset = _mm_setzero_ps();
__m128i mask = _mm_set1_epi32(0x0c080400); // __m128i mask = _mm_set1_epi32(0x0c080400);
__m128 sign_mask = _mm_set1_ps(-0.f); // __m128 sign_mask = _mm_set1_ps(-0.f);
for (int i = 0; i < worker->m_h*worker->m_w; i += 4) { // for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
__m128 x = _mm_load_ps(&source[i]); // __m128 x = _mm_load_ps(&source[i]);
x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4))); // x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40)); // x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
x = _mm_add_ps(x, offset); // x = _mm_add_ps(x, offset);
__m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x) // __m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x)
y = _mm_min_ps(y, _mm_set1_ps(1.0f)); // y = _mm_min_ps(y, _mm_set1_ps(1.0f));
y = _mm_mul_ps(y, _mm_set1_ps(255.0f)); // y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
__m128i z = _mm_cvtps_epi32(y); // __m128i z = _mm_cvtps_epi32(y);
z = _mm_shuffle_epi8(z, mask); // z = _mm_shuffle_epi8(z, mask);
_mm_store_ss((float *)&dest[i], (__m128)z); // _mm_store_ss((float *)&dest[i], (__m128)z);
offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3)); // offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
} // }
#else /* FT_SSE4_1 */ //#else /* FT_SSE4_1 */
FT20D12 value = 0;
float value = 0.0f;
while ( dest < dest_end ) while ( dest < dest_end )
{ {
value += *source++; value += *source++;
if ( value > 0.0f )
{ if(value > 0){
int n = (int)( fabs( value ) * 255.0f + 0.5f ); int n = value >>4;
if ( n > 255 )
n = 255; if(n>255)n=255;
*dest = (unsigned char)n; *dest = (unsigned char)n;
} }else{
else
*dest = 0; *dest = 0;
}
dest++; dest++;
} }
#endif /* FT_SSE4_1 */ //#endif /* FT_SSE4_1 */
free(worker->m_a); free(worker->m_a);
return error; return error;
@ -444,10 +461,10 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
int size = worker->m_w * worker->m_h + 4; int size = worker->m_w * worker->m_h + 4;
worker->m_a = malloc( sizeof( float ) * size ); worker->m_a = malloc( sizeof( FT20D12 ) * size );
worker->m_a_size = size; worker->m_a_size = size;
memset( worker->m_a, 0, ( sizeof( float ) * size ) ); memset( worker->m_a, 0, ( sizeof( FT20D12 ) * size ) );
/* exit if nothing to do */ /* exit if nothing to do */
if ( worker->m_w <= worker->m_origin_x || worker->m_h <= worker->m_origin_y ) if ( worker->m_w <= worker->m_origin_x || worker->m_h <= worker->m_origin_y )
{ {

View File

@ -19,6 +19,10 @@ extern "C"
{ {
#endif #endif
typedef signed long long FT26D6; /* 26.6 fixed-point representation */
typedef signed long long FT20D12; /* 20.12 fixed-point representation */
typedef struct typedef struct
{ {
/** The array used to store signed area differences. */ /** The array used to store signed area differences. */