From f1ef14dc177b23c908e4dd2d47846cb673e3feac Mon Sep 17 00:00:00 2001 From: Piotr Caban Date: Wed, 4 Aug 2021 18:05:03 +0200 Subject: [PATCH] msvcrt: Introduce _setfp helper to access floating point status and control words. Signed-off-by: Piotr Caban Signed-off-by: Alexandre Julliard --- dlls/msvcrt/math.c | 324 ++++++++++++++++++++++----------------------- 1 file changed, 162 insertions(+), 162 deletions(-) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index c385dd036b7..b27b6391a6f 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -5235,37 +5235,148 @@ static BOOL _setfp_sse( unsigned int *cw, unsigned int cw_mask, } #endif +#if defined(__i386__) || defined(__x86_64__) +static BOOL _setfp( unsigned int *cw, unsigned int cw_mask, + unsigned int *sw, unsigned int sw_mask ) +{ +#if defined(__GNUC__) || defined(__clang__) +#ifdef __i386__ + unsigned long oldcw = 0, newcw = 0; + unsigned long oldsw = 0, newsw = 0; + unsigned int flags; + + cw_mask &= _MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC; + sw_mask &= _MCW_EM; + + if (sw) + { + __asm__ __volatile__( "fstsw %0" : "=m" (newsw) ); + oldsw = newsw; + + flags = 0; + if (newsw & 0x1) flags |= _SW_INVALID; + if (newsw & 0x2) flags |= _SW_DENORMAL; + if (newsw & 0x4) flags |= _SW_ZERODIVIDE; + if (newsw & 0x8) flags |= _SW_OVERFLOW; + if (newsw & 0x10) flags |= _SW_UNDERFLOW; + if (newsw & 0x20) flags |= _SW_INEXACT; + + *sw = (flags & ~sw_mask) | (*sw & sw_mask); + TRACE("x86 update sw %08x to %08x\n", flags, *sw); + newsw &= ~0x3f; + if (*sw & _SW_INVALID) newsw |= 0x1; + if (*sw & _SW_DENORMAL) newsw |= 0x2; + if (*sw & _SW_ZERODIVIDE) newsw |= 0x4; + if (*sw & _SW_OVERFLOW) newsw |= 0x8; + if (*sw & _SW_UNDERFLOW) newsw |= 0x10; + if (*sw & _SW_INEXACT) newsw |= 0x20; + *sw = flags; + } + + if (cw) + { + __asm__ __volatile__( "fstcw %0" : "=m" (newcw) ); + oldcw = newcw; + + flags = 0; + if (newcw & 0x1) flags |= _EM_INVALID; + if (newcw & 0x2) flags |= _EM_DENORMAL; + if (newcw & 0x4) flags |= _EM_ZERODIVIDE; + if (newcw & 0x8) flags |= _EM_OVERFLOW; + if (newcw & 0x10) flags |= _EM_UNDERFLOW; + if (newcw & 0x20) flags |= _EM_INEXACT; + switch (newcw & 0xc00) + { + case 0xc00: flags |= _RC_UP|_RC_DOWN; break; + case 0x800: flags |= _RC_UP; break; + case 0x400: flags |= _RC_DOWN; break; + } + switch (newcw & 0x300) + { + case 0x0: flags |= _PC_24; break; + case 0x200: flags |= _PC_53; break; + case 0x300: flags |= _PC_64; break; + } + if (newcw & 0x1000) flags |= _IC_AFFINE; + + *cw = (flags & ~cw_mask) | (*cw & cw_mask); + TRACE("x86 update cw %08x to %08x\n", flags, *cw); + newcw &= ~0x1e3f; + if (*cw & _EM_INVALID) newcw |= 0x1; + if (*cw & _EM_DENORMAL) newcw |= 0x2; + if (*cw & _EM_ZERODIVIDE) newcw |= 0x4; + if (*cw & _EM_OVERFLOW) newcw |= 0x8; + if (*cw & _EM_UNDERFLOW) newcw |= 0x10; + if (*cw & _EM_INEXACT) newcw |= 0x20; + switch (*cw & _MCW_RC) + { + case _RC_UP|_RC_DOWN: newcw |= 0xc00; break; + case _RC_UP: newcw |= 0x800; break; + case _RC_DOWN: newcw |= 0x400; break; + } + switch (*cw & _MCW_PC) + { + case _PC_64: newcw |= 0x300; break; + case _PC_53: newcw |= 0x200; break; + case _PC_24: newcw |= 0x0; break; + } + if (*cw & _IC_AFFINE) newcw |= 0x1000; + } + + if (oldsw != newsw && newsw) + { + struct { + WORD control_word; + WORD unused1; + WORD status_word; + WORD unused2; + WORD tag_word; + WORD unused3; + DWORD instruction_pointer; + WORD code_segment; + WORD unused4; + DWORD operand_addr; + WORD data_segment; + WORD unused5; + } fenv; + + __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) ); + fenv.control_word = newcw; + fenv.status_word = newsw; + __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)", + "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" ); + return TRUE; + } + + if (oldsw != newsw) + __asm__ __volatile__( "fnclex" ); + if (oldcw != newcw) + __asm__ __volatile__( "fldcw %0" : : "m" (newcw) ); + return TRUE; +#elif defined(__x86_64__) + return _setfp_sse(cw, cw_mask, sw, sw_mask); +#endif +#else + FIXME("not implemented\n"); + if (cw) *cw = 0; + if (sw) *sw = 0; + return FALSE; +#endif +} +#endif + /********************************************************************** * _statusfp2 (MSVCR80.@) */ #if defined(__i386__) void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw ) { -#if defined(__GNUC__) || defined(__clang__) - unsigned int flags; - unsigned long fpword; - if (x86_sw) - { - __asm__ __volatile__( "fstsw %0" : "=m" (fpword) ); - flags = 0; - if (fpword & 0x1) flags |= _SW_INVALID; - if (fpword & 0x2) flags |= _SW_DENORMAL; - if (fpword & 0x4) flags |= _SW_ZERODIVIDE; - if (fpword & 0x8) flags |= _SW_OVERFLOW; - if (fpword & 0x10) flags |= _SW_UNDERFLOW; - if (fpword & 0x20) flags |= _SW_INEXACT; - *x86_sw = flags; - } - + _setfp(NULL, 0, x86_sw, 0); if (!sse2_sw) return; - if (sse2_supported) _setfp_sse(NULL, 0, sse2_sw, 0); else *sse2_sw = 0; -#else - FIXME( "not implemented\n" ); -#endif } #endif @@ -5282,7 +5393,7 @@ unsigned int CDECL _statusfp(void) /* FIXME: there's no definition for ambiguous status, just return all status bits for now */ flags = x86_sw | sse2_sw; #elif defined(__x86_64__) - _setfp_sse(NULL, 0, &flags, 0); + _setfp(NULL, 0, &flags, 0); #elif defined(__aarch64__) ULONG_PTR fpsr; @@ -5315,17 +5426,8 @@ unsigned int CDECL _statusfp(void) unsigned int CDECL _clearfp(void) { unsigned int flags = 0; -#if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) - unsigned long fpword; - - __asm__ __volatile__( "fnstsw %0; fnclex" : "=m" (fpword) ); - if (fpword & 0x1) flags |= _SW_INVALID; - if (fpword & 0x2) flags |= _SW_DENORMAL; - if (fpword & 0x4) flags |= _SW_ZERODIVIDE; - if (fpword & 0x8) flags |= _SW_OVERFLOW; - if (fpword & 0x10) flags |= _SW_UNDERFLOW; - if (fpword & 0x20) flags |= _SW_INEXACT; - +#ifdef __i386__ + _setfp(NULL, 0, &flags, _MCW_EM); if (sse2_supported) { unsigned int sse_sw = 0; @@ -5334,7 +5436,7 @@ unsigned int CDECL _clearfp(void) flags |= sse_sw; } #elif defined(__x86_64__) - _setfp_sse(NULL, 0, &flags, _MCW_EM); + _setfp(NULL, 0, &flags, _MCW_EM); #elif defined(__aarch64__) ULONG_PTR fpsr; @@ -5414,66 +5516,11 @@ double CDECL _chgsign(double num) int CDECL __control87_2( unsigned int newval, unsigned int mask, unsigned int *x86_cw, unsigned int *sse2_cw ) { -#if defined(__GNUC__) || defined(__clang__) - unsigned long fpword; - unsigned int flags; - if (x86_cw) { - __asm__ __volatile__( "fstcw %0" : "=m" (fpword) ); - - /* Convert into mask constants */ - flags = 0; - if (fpword & 0x1) flags |= _EM_INVALID; - if (fpword & 0x2) flags |= _EM_DENORMAL; - if (fpword & 0x4) flags |= _EM_ZERODIVIDE; - if (fpword & 0x8) flags |= _EM_OVERFLOW; - if (fpword & 0x10) flags |= _EM_UNDERFLOW; - if (fpword & 0x20) flags |= _EM_INEXACT; - switch (fpword & 0xc00) - { - case 0xc00: flags |= _RC_UP|_RC_DOWN; break; - case 0x800: flags |= _RC_UP; break; - case 0x400: flags |= _RC_DOWN; break; - } - switch (fpword & 0x300) - { - case 0x0: flags |= _PC_24; break; - case 0x200: flags |= _PC_53; break; - case 0x300: flags |= _PC_64; break; - } - if (fpword & 0x1000) flags |= _IC_AFFINE; - - TRACE( "x86 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask ); - if (mask) - { - flags = (flags & ~mask) | (newval & mask); - - /* Convert (masked) value back to fp word */ - fpword = 0; - if (flags & _EM_INVALID) fpword |= 0x1; - if (flags & _EM_DENORMAL) fpword |= 0x2; - if (flags & _EM_ZERODIVIDE) fpword |= 0x4; - if (flags & _EM_OVERFLOW) fpword |= 0x8; - if (flags & _EM_UNDERFLOW) fpword |= 0x10; - if (flags & _EM_INEXACT) fpword |= 0x20; - switch (flags & _MCW_RC) - { - case _RC_UP|_RC_DOWN: fpword |= 0xc00; break; - case _RC_UP: fpword |= 0x800; break; - case _RC_DOWN: fpword |= 0x400; break; - } - switch (flags & _MCW_PC) - { - case _PC_64: fpword |= 0x300; break; - case _PC_53: fpword |= 0x200; break; - case _PC_24: fpword |= 0x0; break; - } - if (flags & _IC_AFFINE) fpword |= 0x1000; - - __asm__ __volatile__( "fldcw %0" : : "m" (fpword) ); - } - *x86_cw = flags; + *x86_cw = newval; + if (!_setfp(x86_cw, mask, NULL, 0)) + return 0; } if (!sse2_cw) return 1; @@ -5487,10 +5534,6 @@ int CDECL __control87_2( unsigned int newval, unsigned int mask, else *sse2_cw = 0; return 1; -#else - FIXME( "not implemented\n" ); - return 0; -#endif } #endif @@ -5509,7 +5552,7 @@ unsigned int CDECL _control87(unsigned int newval, unsigned int mask) flags |= sse2_cw; #elif defined(__x86_64__) flags = newval; - _setfp_sse(&flags, mask, NULL, 0); + _setfp(&flags, mask, NULL, 0); #elif defined(__aarch64__) ULONG_PTR fpcr; @@ -5747,7 +5790,7 @@ int CDECL fegetenv(fenv_t *env) env->_Fe_stat = fenv_encode(0, _statusfp()); #else env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW | - _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _RC_CHOP); + _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _MCW_RC); env->_Fe_stat = _statusfp(); #endif return 0; @@ -5911,7 +5954,7 @@ void CDECL _fpreset(void) } #elif defined(__x86_64__) unsigned int cw = _MCW_EM, sw = 0; - _setfp_sse(&cw, ~0, &sw, ~0); + _setfp(&cw, ~0, &sw, ~0); #else FIXME( "not implemented\n" ); #endif @@ -5923,24 +5966,9 @@ void CDECL _fpreset(void) */ int CDECL fesetenv(const fenv_t *env) { -#if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) - unsigned int x87_cw, sse_cw, x87_stat, sse_stat; -#ifdef __i386__ - struct { - WORD control_word; - WORD unused1; - WORD status_word; - WORD unused2; - WORD tag_word; - WORD unused3; - DWORD instruction_pointer; - WORD code_segment; - WORD unused4; - DWORD operand_addr; - WORD data_segment; - WORD unused5; - } fenv; -#endif +#if defined(__i386__) || defined(__x86_64__) + unsigned int x87_cw, cw, x87_stat, stat; + unsigned int mask; TRACE( "(%p)\n", env ); @@ -5949,60 +5977,32 @@ int CDECL fesetenv(const fenv_t *env) return 0; } - if (!fenv_decode(env->_Fe_ctl, &x87_cw, &sse_cw)) + if (!fenv_decode(env->_Fe_ctl, &x87_cw, &cw)) return 1; - if (!fenv_decode(env->_Fe_stat, &x87_stat, &sse_stat)) + if (!fenv_decode(env->_Fe_stat, &x87_stat, &stat)) return 1; +#if _MSVCR_VER >= 140 + mask = ~0; +#else + mask = _EM_INEXACT | _EM_UNDERFLOW | _EM_OVERFLOW + | _EM_ZERODIVIDE | _EM_INVALID | _MCW_RC; +#endif + #ifdef __i386__ - __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) ); - - fenv.control_word &= ~0xc3d; -#if _MSVCR_VER>=140 - fenv.control_word &= ~0x1302; -#endif - if (x87_cw & _EM_INVALID) fenv.control_word |= 0x1; - if (x87_cw & _EM_ZERODIVIDE) fenv.control_word |= 0x4; - if (x87_cw & _EM_OVERFLOW) fenv.control_word |= 0x8; - if (x87_cw & _EM_UNDERFLOW) fenv.control_word |= 0x10; - if (x87_cw & _EM_INEXACT) fenv.control_word |= 0x20; - switch (x87_cw & _MCW_RC) - { - case _RC_UP|_RC_DOWN: fenv.control_word |= 0xc00; break; - case _RC_UP: fenv.control_word |= 0x800; break; - case _RC_DOWN: fenv.control_word |= 0x400; break; - } -#if _MSVCR_VER>=140 - if (x87_cw & _EM_DENORMAL) fenv.control_word |= 0x2; - switch (x87_cw & _MCW_PC) - { - case _PC_64: fenv.control_word |= 0x300; break; - case _PC_53: fenv.control_word |= 0x200; break; - case _PC_24: fenv.control_word |= 0x0; break; - } - if (x87_cw & _IC_AFFINE) fenv.control_word |= 0x1000; -#endif - - fenv.status_word &= ~0x3f; - if (x87_stat & _SW_INVALID) fenv.status_word |= 0x1; - if (x87_stat & _SW_DENORMAL) fenv.status_word |= 0x2; - if (x87_stat & _SW_ZERODIVIDE) fenv.status_word |= 0x4; - if (x87_stat & _SW_OVERFLOW) fenv.status_word |= 0x8; - if (x87_stat & _SW_UNDERFLOW) fenv.status_word |= 0x10; - if (x87_stat & _SW_INEXACT) fenv.status_word |= 0x20; - - __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)", - "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" ); -#endif - + if (!_setfp(&x87_cw, mask, &x87_stat, ~0)) + return 1; if (sse2_supported) { - if(!_setfp_sse(&sse_cw, _MSVCR_VER>=140 ? ~0 : - ~_EM_DENORMAL & (_MCW_EM | _MCW_RC), &sse_stat, _MCW_EM)) + if (!_setfp_sse(&cw, mask, &stat, ~0)) return 1; } - return 0; +#else + if (!_setfp(&cw, mask, &stat, ~0)) + return 1; + return 0; +#endif #elif defined(__aarch64__) ULONG_PTR fpsr; unsigned int tmp, fp_cw, fp_stat; @@ -6057,8 +6057,8 @@ int CDECL fesetenv(const fenv_t *env) return 0; #else FIXME( "not implemented\n" ); -#endif return 1; +#endif } #endif