* include/freetype/ftoption.h, include/freetype/ftconfig.h,
builds/unix/ftconfig.in, include/freetype/freetype.h, src/base/ftcalc.c: Make FT_MulFix an inlined function. Also provide an assembler implementation for ARM architectures. this is done to speedup FreeType a little (on x86 3% when loading+hinting, 10% when rendering, ARM savings are more important though). Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in ftconfig.h
This commit is contained in:
parent
238bb38513
commit
f47d263f1b
10
ChangeLog
10
ChangeLog
|
@ -1,5 +1,15 @@
|
||||||
2008-09-01 david turner <david@freetype.org>
|
2008-09-01 david turner <david@freetype.org>
|
||||||
|
|
||||||
|
* include/freetype/ftoption.h, include/freetype/ftconfig.h,
|
||||||
|
builds/unix/ftconfig.in, include/freetype/freetype.h,
|
||||||
|
src/base/ftcalc.c:
|
||||||
|
Make FT_MulFix an inlined function. Also provide an assembler
|
||||||
|
implementation for ARM architectures. this is done to speedup
|
||||||
|
FreeType a little (on x86 3% when loading+hinting, 10% when
|
||||||
|
rendering, ARM savings are more important though).
|
||||||
|
Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
|
||||||
|
ftconfig.h
|
||||||
|
|
||||||
* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
|
* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
|
||||||
include/freetype/config/ftheader.h, include/freetype/freetype.h,
|
include/freetype/config/ftheader.h, include/freetype/freetype.h,
|
||||||
src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,
|
src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,
|
||||||
|
|
|
@ -197,6 +197,67 @@ FT_BEGIN_HEADER
|
||||||
|
|
||||||
#endif /* FT_SIZEOF_LONG == 8 */
|
#endif /* FT_SIZEOF_LONG == 8 */
|
||||||
|
|
||||||
|
#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
|
||||||
|
/* provide assembler fragments for performance-critical
|
||||||
|
* functions. these must be defined static __inline__
|
||||||
|
* with GCC
|
||||||
|
*/
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
|
||||||
|
# if defined(__arm__) && !defined(__thumb__)
|
||||||
|
# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
|
||||||
|
static __inline__ FT_Int32
|
||||||
|
FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
|
||||||
|
{
|
||||||
|
register FT_Int32 t, t2;
|
||||||
|
asm __volatile__ (
|
||||||
|
"smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
|
||||||
|
"mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
|
||||||
|
"add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
|
||||||
|
"adds %1, %1, %0\n\t" /* %1 += %0 */
|
||||||
|
"adc %2, %2, #0\n\t" /* %2 += carry */
|
||||||
|
"mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
|
||||||
|
"orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
|
||||||
|
: "=r"(a), "=&r"(t2), "=&r"(t)
|
||||||
|
: "r"(a), "r"(b)
|
||||||
|
);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
# endif /* __arm__ */
|
||||||
|
|
||||||
|
# if defined(i386)
|
||||||
|
# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
|
||||||
|
static __inline__ FT_Int32
|
||||||
|
FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
|
||||||
|
{
|
||||||
|
register FT_Int32 result;
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"imul %%edx\n"
|
||||||
|
"movl %%edx, %%ecx\n"
|
||||||
|
"sarl $31, %%ecx\n"
|
||||||
|
"addl $0x8000, %%ecx\n"
|
||||||
|
"addl %%ecx, %%eax\n"
|
||||||
|
"adcl $0, %%edx\n"
|
||||||
|
"shrl $16, %%eax\n"
|
||||||
|
"shll $16, %%edx\n"
|
||||||
|
"addl %%edx, %%eax\n"
|
||||||
|
: "=a"(result), "+d"(b)
|
||||||
|
: "a"(a)
|
||||||
|
: "%ecx"
|
||||||
|
);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
# endif /* i386 */
|
||||||
|
#endif /* __GNUC__ */
|
||||||
|
#endif /* !NO_ASSEMBLER */
|
||||||
|
|
||||||
|
#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
|
||||||
|
# ifdef FT_MULFIX_ASSEMBLER
|
||||||
|
# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define FT_BEGIN_STMNT do {
|
#define FT_BEGIN_STMNT do {
|
||||||
#define FT_END_STMNT } while ( 0 )
|
#define FT_END_STMNT } while ( 0 )
|
||||||
|
|
|
@ -225,6 +225,67 @@ FT_BEGIN_HEADER
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
|
||||||
|
/* provide assembler fragments for performance-critical
|
||||||
|
* functions. these must be defined static __inline__
|
||||||
|
* with GCC
|
||||||
|
*/
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
|
||||||
|
# if defined(__arm__) && !defined(__thumb__)
|
||||||
|
# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
|
||||||
|
static __inline__ FT_Int32
|
||||||
|
FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
|
||||||
|
{
|
||||||
|
register FT_Int32 t, t2;
|
||||||
|
asm __volatile__ (
|
||||||
|
"smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
|
||||||
|
"mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
|
||||||
|
"add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
|
||||||
|
"adds %1, %1, %0\n\t" /* %1 += %0 */
|
||||||
|
"adc %2, %2, #0\n\t" /* %2 += carry */
|
||||||
|
"mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
|
||||||
|
"orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
|
||||||
|
: "=r"(a), "=&r"(t2), "=&r"(t)
|
||||||
|
: "r"(a), "r"(b)
|
||||||
|
);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
# endif /* __arm__ */
|
||||||
|
|
||||||
|
# if defined(i386)
|
||||||
|
# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
|
||||||
|
static __inline__ FT_Int32
|
||||||
|
FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
|
||||||
|
{
|
||||||
|
register FT_Int32 result;
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"imul %%edx\n"
|
||||||
|
"movl %%edx, %%ecx\n"
|
||||||
|
"sarl $31, %%ecx\n"
|
||||||
|
"addl $0x8000, %%ecx\n"
|
||||||
|
"addl %%ecx, %%eax\n"
|
||||||
|
"adcl $0, %%edx\n"
|
||||||
|
"shrl $16, %%eax\n"
|
||||||
|
"shll $16, %%edx\n"
|
||||||
|
"addl %%edx, %%eax\n"
|
||||||
|
: "=a"(result), "+d"(b)
|
||||||
|
: "a"(a)
|
||||||
|
: "%ecx"
|
||||||
|
);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
# endif /* i386 */
|
||||||
|
#endif /* __GNUC__ */
|
||||||
|
#endif /* !NO_ASSEMBLER */
|
||||||
|
|
||||||
|
#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
|
||||||
|
# ifdef FT_MULFIX_ASSEMBLER
|
||||||
|
# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* determine whether we have a 64-bit int type for platforms without */
|
/* determine whether we have a 64-bit int type for platforms without */
|
||||||
/* Autoconf */
|
/* Autoconf */
|
||||||
|
|
|
@ -115,6 +115,26 @@ FT_BEGIN_HEADER
|
||||||
#undef FT_CONFIG_OPTION_FORCE_INT64
|
#undef FT_CONFIG_OPTION_FORCE_INT64
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/* */
|
||||||
|
/* When this macro is defined, do not try to use an assembler version */
|
||||||
|
/* of performance-critical functions (e.g. FT_MulFix). you should only */
|
||||||
|
/* do that to verify that the assembler function works properly, or even */
|
||||||
|
/* to benchmarks the various implementations... */
|
||||||
|
/* #define FT_CONFIG_OPTION_NO_ASSEMBLER */
|
||||||
|
|
||||||
|
/*************************************************************************/
|
||||||
|
/* */
|
||||||
|
/* When this macro is defined, try to use an inlined assembler version */
|
||||||
|
/* of the FT_MulFix function, which appears to be a hotspot when loading */
|
||||||
|
/* and hinting glyphs. */
|
||||||
|
/* */
|
||||||
|
/* note that if your compiler/cpu isn't supported, this will default to */
|
||||||
|
/* the standard and portable implementation found in src/base/ftcalc.c */
|
||||||
|
/* */
|
||||||
|
#define FT_CONFIG_OPTION_INLINE_MULFIX
|
||||||
|
|
||||||
|
|
||||||
/*************************************************************************/
|
/*************************************************************************/
|
||||||
/* */
|
/* */
|
||||||
/* LZW-compressed file support. */
|
/* LZW-compressed file support. */
|
||||||
|
|
|
@ -3468,10 +3468,13 @@ FT_BEGIN_HEADER
|
||||||
/* _second_ argument of this function; this can make a great */
|
/* _second_ argument of this function; this can make a great */
|
||||||
/* difference. */
|
/* difference. */
|
||||||
/* */
|
/* */
|
||||||
|
#ifdef FT_MULFIX_INLINED
|
||||||
|
# define FT_MulFix(a,b) FT_MULFIX_INLINED(a,b)
|
||||||
|
#else
|
||||||
FT_EXPORT( FT_Long )
|
FT_EXPORT( FT_Long )
|
||||||
FT_MulFix( FT_Long a,
|
FT_MulFix( FT_Long a,
|
||||||
FT_Long b );
|
FT_Long b );
|
||||||
|
#endif
|
||||||
|
|
||||||
/*************************************************************************/
|
/*************************************************************************/
|
||||||
/* */
|
/* */
|
||||||
|
|
|
@ -38,6 +38,9 @@
|
||||||
#include FT_INTERNAL_DEBUG_H
|
#include FT_INTERNAL_DEBUG_H
|
||||||
#include FT_INTERNAL_OBJECTS_H
|
#include FT_INTERNAL_OBJECTS_H
|
||||||
|
|
||||||
|
#ifdef FT_MULFIX_INLINED
|
||||||
|
#undef FT_MulFix
|
||||||
|
#endif
|
||||||
|
|
||||||
/* we need to define a 64-bits data type here */
|
/* we need to define a 64-bits data type here */
|
||||||
|
|
||||||
|
@ -193,6 +196,9 @@
|
||||||
FT_MulFix( FT_Long a,
|
FT_MulFix( FT_Long a,
|
||||||
FT_Long b )
|
FT_Long b )
|
||||||
{
|
{
|
||||||
|
#ifdef FT_MULFIX_ASSEMBLER
|
||||||
|
return FT_MULFIX_ASSEMBLER(a,b);
|
||||||
|
#else
|
||||||
FT_Int s = 1;
|
FT_Int s = 1;
|
||||||
FT_Long c;
|
FT_Long c;
|
||||||
|
|
||||||
|
@ -202,6 +208,7 @@
|
||||||
|
|
||||||
c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
|
c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
|
||||||
return ( s > 0 ) ? c : -c ;
|
return ( s > 0 ) ? c : -c ;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -413,30 +420,8 @@
|
||||||
FT_MulFix( FT_Long a,
|
FT_MulFix( FT_Long a,
|
||||||
FT_Long b )
|
FT_Long b )
|
||||||
{
|
{
|
||||||
/* use inline assembly to speed up things a bit */
|
#ifdef FT_MULFIX_ASSEMBLER
|
||||||
|
return FT_MULFIX_ASSEMBLER(a,b);
|
||||||
#if defined( __GNUC__ ) && defined( i386 )
|
|
||||||
|
|
||||||
FT_Long result;
|
|
||||||
|
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
|
||||||
"imul %%edx\n"
|
|
||||||
"movl %%edx, %%ecx\n"
|
|
||||||
"sarl $31, %%ecx\n"
|
|
||||||
"addl $0x8000, %%ecx\n"
|
|
||||||
"addl %%ecx, %%eax\n"
|
|
||||||
"adcl $0, %%edx\n"
|
|
||||||
"shrl $16, %%eax\n"
|
|
||||||
"shll $16, %%edx\n"
|
|
||||||
"addl %%edx, %%eax\n"
|
|
||||||
"mov %%eax, %0\n"
|
|
||||||
: "=a"(result), "+d"(b)
|
|
||||||
: "a"(a)
|
|
||||||
: "%ecx"
|
|
||||||
);
|
|
||||||
return result;
|
|
||||||
|
|
||||||
#elif 0
|
#elif 0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue