* include/freetype/ftoption.h, include/freetype/ftconfig.h,

builds/unix/ftconfig.in, include/freetype/freetype.h,
    src/base/ftcalc.c:
    Make FT_MulFix an inlined function. Also provide an assembler
    implementation for ARM architectures. this is done to speedup
    FreeType a little (on x86 3% when loading+hinting, 10% when
    rendering, ARM savings are more important though).
    Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
    ftconfig.h
This commit is contained in:
David Turner 2008-09-02 02:21:58 +00:00
parent 238bb38513
commit f47d263f1b
6 changed files with 165 additions and 25 deletions

View File

@ -1,5 +1,15 @@
2008-09-01 david turner <david@freetype.org>
* include/freetype/ftoption.h, include/freetype/ftconfig.h,
builds/unix/ftconfig.in, include/freetype/freetype.h,
src/base/ftcalc.c:
Make FT_MulFix an inlined function. Also provide an assembler
implementation for ARM architectures. this is done to speedup
FreeType a little (on x86 3% when loading+hinting, 10% when
rendering, ARM savings are more important though).
Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
ftconfig.h
* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
include/freetype/config/ftheader.h, include/freetype/freetype.h,
src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,

View File

@ -197,6 +197,67 @@ FT_BEGIN_HEADER
#endif /* FT_SIZEOF_LONG == 8 */
#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
/* provide assembler fragments for performance-critical
* functions. these must be defined static __inline__
* with GCC
*/
#if defined(__GNUC__)
# if defined(__arm__) && !defined(__thumb__)
# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
static __inline__ FT_Int32
FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
{
register FT_Int32 t, t2;
asm __volatile__ (
"smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
"mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
"add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
"adds %1, %1, %0\n\t" /* %1 += %0 */
"adc %2, %2, #0\n\t" /* %2 += carry */
"mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
"orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
: "=r"(a), "=&r"(t2), "=&r"(t)
: "r"(a), "r"(b)
);
return a;
}
# endif /* __arm__ */
# if defined(i386)
# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
static __inline__ FT_Int32
FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
{
register FT_Int32 result;
__asm__ __volatile__ (
"imul %%edx\n"
"movl %%edx, %%ecx\n"
"sarl $31, %%ecx\n"
"addl $0x8000, %%ecx\n"
"addl %%ecx, %%eax\n"
"adcl $0, %%edx\n"
"shrl $16, %%eax\n"
"shll $16, %%edx\n"
"addl %%edx, %%eax\n"
: "=a"(result), "+d"(b)
: "a"(a)
: "%ecx"
);
return result;
}
# endif /* i386 */
#endif /* __GNUC__ */
#endif /* !NO_ASSEMBLER */
#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
# ifdef FT_MULFIX_ASSEMBLER
# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
# endif
#endif
#define FT_BEGIN_STMNT do {
#define FT_END_STMNT } while ( 0 )

View File

@ -225,6 +225,67 @@ FT_BEGIN_HEADER
#endif
#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
/* provide assembler fragments for performance-critical
* functions. these must be defined static __inline__
* with GCC
*/
#if defined(__GNUC__)
# if defined(__arm__) && !defined(__thumb__)
# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
static __inline__ FT_Int32
FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
{
register FT_Int32 t, t2;
asm __volatile__ (
"smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
"mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
"add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
"adds %1, %1, %0\n\t" /* %1 += %0 */
"adc %2, %2, #0\n\t" /* %2 += carry */
"mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
"orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
: "=r"(a), "=&r"(t2), "=&r"(t)
: "r"(a), "r"(b)
);
return a;
}
# endif /* __arm__ */
# if defined(i386)
# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
static __inline__ FT_Int32
FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
{
register FT_Int32 result;
__asm__ __volatile__ (
"imul %%edx\n"
"movl %%edx, %%ecx\n"
"sarl $31, %%ecx\n"
"addl $0x8000, %%ecx\n"
"addl %%ecx, %%eax\n"
"adcl $0, %%edx\n"
"shrl $16, %%eax\n"
"shll $16, %%edx\n"
"addl %%edx, %%eax\n"
: "=a"(result), "+d"(b)
: "a"(a)
: "%ecx"
);
return result;
}
# endif /* i386 */
#endif /* __GNUC__ */
#endif /* !NO_ASSEMBLER */
#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
# ifdef FT_MULFIX_ASSEMBLER
# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
# endif
#endif
/* determine whether we have a 64-bit int type for platforms without */
/* Autoconf */

View File

@ -115,6 +115,26 @@ FT_BEGIN_HEADER
#undef FT_CONFIG_OPTION_FORCE_INT64
/*************************************************************************/
/* */
/* When this macro is defined, do not try to use an assembler version */
/* of performance-critical functions (e.g. FT_MulFix). you should only */
/* do that to verify that the assembler function works properly, or even */
/* to benchmarks the various implementations... */
/* #define FT_CONFIG_OPTION_NO_ASSEMBLER */
/*************************************************************************/
/* */
/* When this macro is defined, try to use an inlined assembler version */
/* of the FT_MulFix function, which appears to be a hotspot when loading */
/* and hinting glyphs. */
/* */
/* note that if your compiler/cpu isn't supported, this will default to */
/* the standard and portable implementation found in src/base/ftcalc.c */
/* */
#define FT_CONFIG_OPTION_INLINE_MULFIX
/*************************************************************************/
/* */
/* LZW-compressed file support. */

View File

@ -3468,10 +3468,13 @@ FT_BEGIN_HEADER
/* _second_ argument of this function; this can make a great */
/* difference. */
/* */
#ifdef FT_MULFIX_INLINED
# define FT_MulFix(a,b) FT_MULFIX_INLINED(a,b)
#else
FT_EXPORT( FT_Long )
FT_MulFix( FT_Long a,
FT_Long b );
#endif
/*************************************************************************/
/* */

View File

@ -38,6 +38,9 @@
#include FT_INTERNAL_DEBUG_H
#include FT_INTERNAL_OBJECTS_H
#ifdef FT_MULFIX_INLINED
#undef FT_MulFix
#endif
/* we need to define a 64-bits data type here */
@ -193,6 +196,9 @@
FT_MulFix( FT_Long a,
FT_Long b )
{
#ifdef FT_MULFIX_ASSEMBLER
return FT_MULFIX_ASSEMBLER(a,b);
#else
FT_Int s = 1;
FT_Long c;
@ -202,6 +208,7 @@
c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
return ( s > 0 ) ? c : -c ;
#endif
}
@ -413,30 +420,8 @@
FT_MulFix( FT_Long a,
FT_Long b )
{
/* use inline assembly to speed up things a bit */
#if defined( __GNUC__ ) && defined( i386 )
FT_Long result;
__asm__ __volatile__ (
"imul %%edx\n"
"movl %%edx, %%ecx\n"
"sarl $31, %%ecx\n"
"addl $0x8000, %%ecx\n"
"addl %%ecx, %%eax\n"
"adcl $0, %%edx\n"
"shrl $16, %%eax\n"
"shll $16, %%edx\n"
"addl %%edx, %%eax\n"
"mov %%eax, %0\n"
: "=a"(result), "+d"(b)
: "a"(a)
: "%ecx"
);
return result;
#ifdef FT_MULFIX_ASSEMBLER
return FT_MULFIX_ASSEMBLER(a,b);
#elif 0
/*