From f5bd0be6a44c1c7d69afb8b8eb6311923e7762a1 Mon Sep 17 00:00:00 2001 From: Piotr Caban Date: Fri, 11 Jun 2021 20:15:43 +0200 Subject: [PATCH] msvcrt: Import exp implementation from musl. Signed-off-by: Piotr Caban Signed-off-by: Alexandre Julliard --- dlls/msvcrt/math.c | 156 ++++++++++++++++++++++++++++++++++-------- dlls/msvcrt/unixlib.c | 9 --- dlls/msvcrt/unixlib.h | 1 - 3 files changed, 128 insertions(+), 38 deletions(-) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index cb1969fcb3e..3d4ce3b3108 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -549,6 +549,27 @@ recompute: return n & 7; } +/* Based on musl implementation: src/math/round.c */ +static double __round(double x) +{ + ULONGLONG llx = *(ULONGLONG*)&x, tmp; + int e = (llx >> 52 & 0x7ff) - 0x3ff; + + if (e >= 52) + return x; + if (e < -1) + return 0 * x; + else if (e == -1) + return signbit(x) ? -1 : 1; + + tmp = 0x000fffffffffffffULL >> e; + if (!(llx & tmp)) + return x; + llx += 0x0008000000000000ULL >> e; + llx &= ~tmp; + return *(double*)&llx; +} + #if !defined(__i386__) || _MSVCR_VER >= 120 /* Copied from musl: src/math/expm1f.c */ static float __expm1f(float x) @@ -669,27 +690,6 @@ static float __cosdf(double x) return ((1.0 + z * C0) + w * C1) + (w * z) * r; } -/* Based on musl implementation: src/math/round.c */ -static double __round(double x) -{ - ULONGLONG llx = *(ULONGLONG*)&x, tmp; - int e = (llx >> 52 & 0x7ff) - 0x3ff; - - if (e >= 52) - return x; - if (e < -1) - return 0 * x; - else if (e == -1) - return signbit(x) ? -1 : 1; - - tmp = 0x000fffffffffffffULL >> e; - if (!(llx & tmp)) - return x; - llx += 0x0008000000000000ULL >> e; - llx &= ~tmp; - return *(double*)&llx; -} - static const UINT64 exp2f_T[] = { 0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL, 0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL, @@ -2794,7 +2794,6 @@ double CDECL cosh( double x ) return t; } -#if _MSVCR_VER >= 120 /* Copied from musl: src/math/exp_data.c */ static const UINT64 exp_T[] = { 0x0ULL, 0x3ff0000000000000ULL, @@ -2926,18 +2925,119 @@ static const UINT64 exp_T[] = { 0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL, 0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL }; -#endif /********************************************************************* * exp (MSVCRT.@) + * + * Copied from musl: src/math/exp.c */ double CDECL exp( double x ) { - double ret = unix_funcs->exp( x ); - if (isnan(x)) return math_error(_DOMAIN, "exp", x, 0, ret); - if (isfinite(x) && !ret) return math_error(_UNDERFLOW, "exp", x, 0, ret); - if (isfinite(x) && !isfinite(ret)) return math_error(_OVERFLOW, "exp", x, 0, ret); - return ret; + static const double C[] = { + 0x1.ffffffffffdbdp-2, + 0x1.555555555543cp-3, + 0x1.55555cf172b91p-5, + 0x1.1111167a4d017p-7 + }; + static const double invln2N = 0x1.71547652b82fep0 * (1 << 7), + negln2hiN = -0x1.62e42fefa0000p-8, + negln2loN = -0x1.cf79abc9e3b3ap-47; + + UINT32 abstop; + UINT64 ki, idx, top, sbits; + double kd, z, r, r2, scale, tail, tmp; + + abstop = (*(UINT64*)&x >> 52) & 0x7ff; + if (abstop - 0x3c9 >= 0x408 - 0x3c9) { + if (abstop - 0x3c9 >= 0x80000000) + /* Avoid spurious underflow for tiny x. */ + /* Note: 0 is common input. */ + return 1.0 + x; + if (abstop >= 0x409) { + if (*(UINT64*)&x == 0xfff0000000000000ULL) + return 0.0; +#if _MSVCR_VER == 0 + if (*(UINT64*)&x > 0x7ff0000000000000ULL) + return math_error(_DOMAIN, "exp", x, 0, 1.0 + x); +#endif + if (abstop >= 0x7ff) + return 1.0 + x; + if (*(UINT64*)&x >> 63) + return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN); + else + return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX); + } + /* Large x is special cased below. */ + abstop = 0; + } + + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + z = invln2N * x; + kd = __round(z); + ki = (INT64)kd; + + r = x + kd * negln2hiN + kd * negln2loN; + /* 2^(k/N) ~= scale * (1 + tail). */ + idx = 2 * (ki % (1 << 7)); + top = ki << (52 - 7); + tail = *(double*)&exp_T[idx]; + /* This is only a valid scale when -1023*N < k < 1024*N. */ + sbits = exp_T[idx + 1] + top; + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; + /* Without fma the worst case error is 0.25/N ulp larger. */ + /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ + tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]); + if (abstop == 0) { + /* Handle cases that may overflow or underflow when computing the result that + is scale*(1+TMP) without intermediate rounding. The bit representation of + scale is in SBITS, however it has a computed exponent that may have + overflown into the sign bit so that needs to be adjusted before using it as + a double. (int32_t)KI is the k used in the argument reduction and exponent + adjustment of scale, positive k here means the result may overflow and + negative k means the result may underflow. */ + double scale, y; + + if ((ki & 0x80000000) == 0) { + /* k > 0, the exponent of scale might have overflowed by <= 460. */ + sbits -= 1009ull << 52; + scale = *(double*)&sbits; + y = 0x1p1009 * (scale + scale * tmp); + if (isinf(y)) + return math_error(_OVERFLOW, "exp", x, 0, y); + return y; + } + /* k < 0, need special care in the subnormal range. */ + sbits += 1022ull << 52; + scale = *(double*)&sbits; + y = scale + scale * tmp; + if (y < 1.0) { + /* Round y to the right precision before scaling it into the subnormal + range to avoid double rounding that can cause 0.5+E/2 ulp error where + E is the worst-case ulp error outside the subnormal range. So this + is only useful if the goal is better than 1 ulp worst-case error. */ + double hi, lo; + lo = scale - y + scale * tmp; + hi = 1.0 + y; + lo = 1.0 - hi + y + lo; + y = hi + lo - 1.0; + /* Avoid -0.0 with downward rounding. */ + if (y == 0.0) + y = 0.0; + /* The underflow exception needs to be signaled explicitly. */ + fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022); + y = 0x1p-1022 * y; + return math_error(_UNDERFLOW, "exp", x, 0, y); + } + y = 0x1p-1022 * y; + return y; + } + scale = *(double*)&sbits; + /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + return scale + scale * tmp; } /********************************************************************* diff --git a/dlls/msvcrt/unixlib.c b/dlls/msvcrt/unixlib.c index 89cc155b4e4..adc290cd653 100644 --- a/dlls/msvcrt/unixlib.c +++ b/dlls/msvcrt/unixlib.c @@ -42,14 +42,6 @@ WINE_DEFAULT_DEBUG_CHANNEL(msvcrt); -/********************************************************************* - * exp - */ -static double CDECL unix_exp( double x ) -{ - return exp( x ); -} - /********************************************************************* * pow */ @@ -60,7 +52,6 @@ static double CDECL unix_pow( double x, double y ) static const struct unix_funcs funcs = { - unix_exp, unix_pow, }; diff --git a/dlls/msvcrt/unixlib.h b/dlls/msvcrt/unixlib.h index af44c0c9d89..0a6db6a8d54 100644 --- a/dlls/msvcrt/unixlib.h +++ b/dlls/msvcrt/unixlib.h @@ -23,7 +23,6 @@ struct unix_funcs { - double (CDECL *exp)(double x); double (CDECL *pow)(double x, double y); };