* include/freetype/ftoption.h, include/freetype/ftconfig.h,
builds/unix/ftconfig.in, include/freetype/freetype.h, src/base/ftcalc.c: Make FT_MulFix an inlined function. Also provide an assembler implementation for ARM architectures. this is done to speedup FreeType a little (on x86 3% when loading+hinting, 10% when rendering, ARM savings are more important though). Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in ftconfig.h
This commit is contained in:
parent
238bb38513
commit
f47d263f1b
10
ChangeLog
10
ChangeLog
|
@ -1,5 +1,15 @@
|
|||
2008-09-01 david turner <david@freetype.org>
|
||||
|
||||
* include/freetype/ftoption.h, include/freetype/ftconfig.h,
|
||||
builds/unix/ftconfig.in, include/freetype/freetype.h,
|
||||
src/base/ftcalc.c:
|
||||
Make FT_MulFix an inlined function. Also provide an assembler
|
||||
implementation for ARM architectures. this is done to speedup
|
||||
FreeType a little (on x86 3% when loading+hinting, 10% when
|
||||
rendering, ARM savings are more important though).
|
||||
Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
|
||||
ftconfig.h
|
||||
|
||||
* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
|
||||
include/freetype/config/ftheader.h, include/freetype/freetype.h,
|
||||
src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,
|
||||
|
|
|
@ -197,6 +197,67 @@ FT_BEGIN_HEADER
|
|||
|
||||
#endif /* FT_SIZEOF_LONG == 8 */
|
||||
|
||||
#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
|
||||
/* provide assembler fragments for performance-critical
|
||||
* functions. these must be defined static __inline__
|
||||
* with GCC
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
|
||||
# if defined(__arm__) && !defined(__thumb__)
|
||||
# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
|
||||
static __inline__ FT_Int32
|
||||
FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
|
||||
{
|
||||
register FT_Int32 t, t2;
|
||||
asm __volatile__ (
|
||||
"smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
|
||||
"mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
|
||||
"add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
|
||||
"adds %1, %1, %0\n\t" /* %1 += %0 */
|
||||
"adc %2, %2, #0\n\t" /* %2 += carry */
|
||||
"mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
|
||||
"orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
|
||||
: "=r"(a), "=&r"(t2), "=&r"(t)
|
||||
: "r"(a), "r"(b)
|
||||
);
|
||||
return a;
|
||||
}
|
||||
# endif /* __arm__ */
|
||||
|
||||
# if defined(i386)
|
||||
# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
|
||||
static __inline__ FT_Int32
|
||||
FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
|
||||
{
|
||||
register FT_Int32 result;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"imul %%edx\n"
|
||||
"movl %%edx, %%ecx\n"
|
||||
"sarl $31, %%ecx\n"
|
||||
"addl $0x8000, %%ecx\n"
|
||||
"addl %%ecx, %%eax\n"
|
||||
"adcl $0, %%edx\n"
|
||||
"shrl $16, %%eax\n"
|
||||
"shll $16, %%edx\n"
|
||||
"addl %%edx, %%eax\n"
|
||||
: "=a"(result), "+d"(b)
|
||||
: "a"(a)
|
||||
: "%ecx"
|
||||
);
|
||||
return result;
|
||||
}
|
||||
# endif /* i386 */
|
||||
#endif /* __GNUC__ */
|
||||
#endif /* !NO_ASSEMBLER */
|
||||
|
||||
#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
|
||||
# ifdef FT_MULFIX_ASSEMBLER
|
||||
# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#define FT_BEGIN_STMNT do {
|
||||
#define FT_END_STMNT } while ( 0 )
|
||||
|
|
|
@ -225,6 +225,67 @@ FT_BEGIN_HEADER
|
|||
|
||||
#endif
|
||||
|
||||
#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
|
||||
/* provide assembler fragments for performance-critical
|
||||
* functions. these must be defined static __inline__
|
||||
* with GCC
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
|
||||
# if defined(__arm__) && !defined(__thumb__)
|
||||
# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
|
||||
static __inline__ FT_Int32
|
||||
FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
|
||||
{
|
||||
register FT_Int32 t, t2;
|
||||
asm __volatile__ (
|
||||
"smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
|
||||
"mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
|
||||
"add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
|
||||
"adds %1, %1, %0\n\t" /* %1 += %0 */
|
||||
"adc %2, %2, #0\n\t" /* %2 += carry */
|
||||
"mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
|
||||
"orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
|
||||
: "=r"(a), "=&r"(t2), "=&r"(t)
|
||||
: "r"(a), "r"(b)
|
||||
);
|
||||
return a;
|
||||
}
|
||||
# endif /* __arm__ */
|
||||
|
||||
# if defined(i386)
|
||||
# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
|
||||
static __inline__ FT_Int32
|
||||
FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
|
||||
{
|
||||
register FT_Int32 result;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"imul %%edx\n"
|
||||
"movl %%edx, %%ecx\n"
|
||||
"sarl $31, %%ecx\n"
|
||||
"addl $0x8000, %%ecx\n"
|
||||
"addl %%ecx, %%eax\n"
|
||||
"adcl $0, %%edx\n"
|
||||
"shrl $16, %%eax\n"
|
||||
"shll $16, %%edx\n"
|
||||
"addl %%edx, %%eax\n"
|
||||
: "=a"(result), "+d"(b)
|
||||
: "a"(a)
|
||||
: "%ecx"
|
||||
);
|
||||
return result;
|
||||
}
|
||||
# endif /* i386 */
|
||||
#endif /* __GNUC__ */
|
||||
#endif /* !NO_ASSEMBLER */
|
||||
|
||||
#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
|
||||
# ifdef FT_MULFIX_ASSEMBLER
|
||||
# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/* determine whether we have a 64-bit int type for platforms without */
|
||||
/* Autoconf */
|
||||
|
|
|
@ -115,6 +115,26 @@ FT_BEGIN_HEADER
|
|||
#undef FT_CONFIG_OPTION_FORCE_INT64
|
||||
|
||||
|
||||
/*************************************************************************/
|
||||
/* */
|
||||
/* When this macro is defined, do not try to use an assembler version */
|
||||
/* of performance-critical functions (e.g. FT_MulFix). you should only */
|
||||
/* do that to verify that the assembler function works properly, or even */
|
||||
/* to benchmarks the various implementations... */
|
||||
/* #define FT_CONFIG_OPTION_NO_ASSEMBLER */
|
||||
|
||||
/*************************************************************************/
|
||||
/* */
|
||||
/* When this macro is defined, try to use an inlined assembler version */
|
||||
/* of the FT_MulFix function, which appears to be a hotspot when loading */
|
||||
/* and hinting glyphs. */
|
||||
/* */
|
||||
/* note that if your compiler/cpu isn't supported, this will default to */
|
||||
/* the standard and portable implementation found in src/base/ftcalc.c */
|
||||
/* */
|
||||
#define FT_CONFIG_OPTION_INLINE_MULFIX
|
||||
|
||||
|
||||
/*************************************************************************/
|
||||
/* */
|
||||
/* LZW-compressed file support. */
|
||||
|
|
|
@ -3468,10 +3468,13 @@ FT_BEGIN_HEADER
|
|||
/* _second_ argument of this function; this can make a great */
|
||||
/* difference. */
|
||||
/* */
|
||||
#ifdef FT_MULFIX_INLINED
|
||||
# define FT_MulFix(a,b) FT_MULFIX_INLINED(a,b)
|
||||
#else
|
||||
FT_EXPORT( FT_Long )
|
||||
FT_MulFix( FT_Long a,
|
||||
FT_Long b );
|
||||
|
||||
#endif
|
||||
|
||||
/*************************************************************************/
|
||||
/* */
|
||||
|
|
|
@ -38,6 +38,9 @@
|
|||
#include FT_INTERNAL_DEBUG_H
|
||||
#include FT_INTERNAL_OBJECTS_H
|
||||
|
||||
#ifdef FT_MULFIX_INLINED
|
||||
#undef FT_MulFix
|
||||
#endif
|
||||
|
||||
/* we need to define a 64-bits data type here */
|
||||
|
||||
|
@ -193,6 +196,9 @@
|
|||
FT_MulFix( FT_Long a,
|
||||
FT_Long b )
|
||||
{
|
||||
#ifdef FT_MULFIX_ASSEMBLER
|
||||
return FT_MULFIX_ASSEMBLER(a,b);
|
||||
#else
|
||||
FT_Int s = 1;
|
||||
FT_Long c;
|
||||
|
||||
|
@ -202,6 +208,7 @@
|
|||
|
||||
c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
|
||||
return ( s > 0 ) ? c : -c ;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -413,30 +420,8 @@
|
|||
FT_MulFix( FT_Long a,
|
||||
FT_Long b )
|
||||
{
|
||||
/* use inline assembly to speed up things a bit */
|
||||
|
||||
#if defined( __GNUC__ ) && defined( i386 )
|
||||
|
||||
FT_Long result;
|
||||
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"imul %%edx\n"
|
||||
"movl %%edx, %%ecx\n"
|
||||
"sarl $31, %%ecx\n"
|
||||
"addl $0x8000, %%ecx\n"
|
||||
"addl %%ecx, %%eax\n"
|
||||
"adcl $0, %%edx\n"
|
||||
"shrl $16, %%eax\n"
|
||||
"shll $16, %%edx\n"
|
||||
"addl %%edx, %%eax\n"
|
||||
"mov %%eax, %0\n"
|
||||
: "=a"(result), "+d"(b)
|
||||
: "a"(a)
|
||||
: "%ecx"
|
||||
);
|
||||
return result;
|
||||
|
||||
#ifdef FT_MULFIX_ASSEMBLER
|
||||
return FT_MULFIX_ASSEMBLER(a,b);
|
||||
#elif 0
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue