From b7920c3991cdd7346acc96119e452a5f468a56e0 Mon Sep 17 00:00:00 2001 From: Piotr Caban Date: Wed, 2 Jun 2021 17:53:34 +0200 Subject: [PATCH] msvcrt: Import fma implementation from musl. Signed-off-by: Piotr Caban Signed-off-by: Alexandre Julliard --- configure | 1 - configure.ac | 1 - dlls/msvcrt/math.c | 210 +++++++++++++++++++++++++++++++++++++++++- dlls/msvcrt/unixlib.c | 13 --- dlls/msvcrt/unixlib.h | 1 - include/config.h.in | 3 - 6 files changed, 205 insertions(+), 24 deletions(-) diff --git a/configure b/configure index 3f8b89d287d..d74c155d8b6 100755 --- a/configure +++ b/configure @@ -19618,7 +19618,6 @@ fi for ac_func in \ exp2 \ exp2f \ - fma \ fmaf \ lgamma \ lgammaf \ diff --git a/configure.ac b/configure.ac index 7ea0d824cee..2882e54bea5 100644 --- a/configure.ac +++ b/configure.ac @@ -2658,7 +2658,6 @@ fi AC_CHECK_FUNCS(\ exp2 \ exp2f \ - fma \ fmaf \ lgamma \ lgammaf \ diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index 3318f128159..596361c6729 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -3258,14 +3258,214 @@ double CDECL floor( double x ) /********************************************************************* * fma (MSVCRT.@) + * + * Copied from musl: src/math/fma.c */ +struct fma_num +{ + UINT64 m; + int e; + int sign; +}; + +static struct fma_num normalize(double x) +{ + UINT64 ix = *(UINT64*)&x; + int e = ix >> 52; + int sign = e & 0x800; + struct fma_num ret; + + e &= 0x7ff; + if (!e) { + x *= 0x1p63; + ix = *(UINT64*)&x; + e = ix >> 52 & 0x7ff; + e = e ? e - 63 : 0x800; + } + ix &= (1ull << 52) - 1; + ix |= 1ull << 52; + ix <<= 1; + e -= 0x3ff + 52 + 1; + + ret.m = ix; + ret.e = e; + ret.sign = sign; + return ret; +} + +static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y) +{ + UINT64 t1, t2, t3; + UINT64 xlo = (UINT32)x, xhi = x >> 32; + UINT64 ylo = (UINT32)y, yhi = y >> 32; + + t1 = xlo * ylo; + t2 = xlo * yhi + xhi * ylo; + t3 = xhi * yhi; + *lo = t1 + (t2 << 32); + *hi = t3 + (t2 >> 32) + (t1 > *lo); +} + double CDECL fma( double x, double y, double z ) { - double w = unix_funcs->fma(x, y, z); - if ((isinf(x) && y == 0) || (x == 0 && isinf(y))) *_errno() = EDOM; - else if (isinf(x) && isinf(z) && x != z) *_errno() = EDOM; - else if (isinf(y) && isinf(z) && y != z) *_errno() = EDOM; - return w; + int e, d, sign, samesign, nonzero; + UINT64 rhi, rlo, zhi, zlo; + struct fma_num nx, ny, nz; + double r; + INT64 i; + + /* normalize so top 10bits and last bit are 0 */ + nx = normalize(x); + ny = normalize(y); + nz = normalize(z); + + if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) { + r = x * y + z; + if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM; + return r; + } + if (nz.e >= 0x7ff - 0x3ff - 52 - 1) { + if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */ + r = x * y + z; + if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM; + return r; + } + return z; + } + + /* mul: r = x*y */ + mul(&rhi, &rlo, nx.m, ny.m); + /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ + + /* align exponents */ + e = nx.e + ny.e; + d = nz.e - e; + /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + if (d > 0) { + if (d < 64) { + zlo = nz.m << d; + zhi = nz.m >> (64 - d); + } else { + zlo = 0; + zhi = nz.m; + e = nz.e - 64; + d -= 64; + if (d < 64 && d) { + rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d)); + rhi = rhi >> d; + } else if (d) { + rlo = 1; + rhi = 0; + } + } + } else { + zhi = 0; + d = -d; + if (d == 0) { + zlo = nz.m; + } else if (d < 64) { + zlo = nz.m >> d | !!(nz.m << (64 - d)); + } else { + zlo = 1; + } + } + + /* add */ + sign = nx.sign ^ ny.sign; + samesign = !(sign ^ nz.sign); + nonzero = 1; + if (samesign) { + /* r += z */ + rlo += zlo; + rhi += zhi + (rlo < zlo); + } else { + /* r -= z */ + UINT64 t = rlo; + rlo -= zlo; + rhi = rhi - zhi - (t < rlo); + if (rhi >> 63) { + rlo = -rlo; + rhi = -rhi - !!rlo; + sign = !sign; + } + nonzero = !!rhi; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if (nonzero) { + e += 64; + if (rhi >> 32) { + BitScanReverse((DWORD*)&d, rhi >> 32); + d = 31 - d - 1; + } else { + BitScanReverse((DWORD*)&d, rhi); + d = 63 - d - 1; + } + /* note: d > 0 */ + rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d); + } else if (rlo) { + if (rlo >> 32) { + BitScanReverse((DWORD*)&d, rlo >> 32); + d = 31 - d - 1; + } else { + BitScanReverse((DWORD*)&d, rlo); + d = 63 - d - 1; + } + if (d < 0) + rhi = rlo >> 1 | (rlo & 1); + else + rhi = rlo << d; + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + i = rhi; /* i is in [1<<62,(1<<63)-1] */ + if (sign) + i = -i; + r = i; /* |r| is in [0x1p62,0x1p63] */ + + if (e < -1022 - 62) { + /* result is subnormal before rounding */ + if (e == -1022 - 63) { + double c = 0x1p63; + if (sign) + c = -c; + if (r == c) { + /* min normal after rounding, underflow depends + on arch behaviour which can be imitated by + a double to float conversion */ + float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r; + return DBL_MIN / FLT_MIN * fltmin; + } + /* one bit is lost when scaled, add another top bit to + only round once at conversion if it is inexact */ + if (rhi << 53) { + double tiny; + + i = rhi >> 1 | (rhi & 1) | 1ull << 62; + if (sign) + i = -i; + r = i; + r = 2 * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + cannot be optimized away */ + tiny = DBL_MIN / FLT_MIN * r; + r += (double)(tiny * tiny) * (r - r); + } + } else { + /* only round once when scaled */ + d = 10; + i = (rhi >> d | !!(rhi << (64 - d))) << d; + if (sign) + i = -i; + r = i; + } + } + return __scalbn(r, e); } /********************************************************************* diff --git a/dlls/msvcrt/unixlib.c b/dlls/msvcrt/unixlib.c index 9afa0f41557..25ebcac1736 100644 --- a/dlls/msvcrt/unixlib.c +++ b/dlls/msvcrt/unixlib.c @@ -82,18 +82,6 @@ static float CDECL unix_exp2f( float x ) #endif } -/********************************************************************* - * fma - */ -static double CDECL unix_fma( double x, double y, double z ) -{ -#ifdef HAVE_FMA - return fma(x, y, z); -#else - return x * y + z; -#endif -} - /********************************************************************* * fmaf */ @@ -292,7 +280,6 @@ static const struct unix_funcs funcs = unix_expf, unix_exp2, unix_exp2f, - unix_fma, unix_fmaf, unix_frexp, unix_frexpf, diff --git a/dlls/msvcrt/unixlib.h b/dlls/msvcrt/unixlib.h index 2faebbac82f..202f8da44cd 100644 --- a/dlls/msvcrt/unixlib.h +++ b/dlls/msvcrt/unixlib.h @@ -27,7 +27,6 @@ struct unix_funcs float (CDECL *expf)(float x); double (CDECL *exp2)(double x); float (CDECL *exp2f)(float x); - double (CDECL *fma)(double x, double y, double z); float (CDECL *fmaf)(float x, float y, float z); double (CDECL *frexp)(double x, int *exp); float (CDECL *frexpf)(float x, int *exp); diff --git a/include/config.h.in b/include/config.h.in index 42aaa2708a8..3d1e5e6ca5a 100644 --- a/include/config.h.in +++ b/include/config.h.in @@ -120,9 +120,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_FLOAT_H -/* Define to 1 if you have the `fma' function. */ -#undef HAVE_FMA - /* Define to 1 if you have the `fmaf' function. */ #undef HAVE_FMAF