wined3d: Support CPU clear of float16 formats.

Signed-off-by: Stefan Dösinger <stefan@codeweavers.com> Signed-off-by: Zebediah Figura <zfigura@codeweavers.com> Signed-off-by: Alexandre Julliard <julliard@winehq.org>
2022-05-16 12:58:05 -05:00 · 2022-05-16 12:58:05 -05:00 · e8c9588e2a
parent f637414446
commit e8c9588e2a
3 changed files with 93 additions and 70 deletions
--- a/dlls/wined3d/surface.c
+++ b/dlls/wined3d/surface.c
@ -39,69 +39,6 @@ static void get_color_masks(const struct wined3d_format *format, uint32_t *masks
    masks[2] = wined3d_mask_from_size(format->blue_size) << format->blue_offset;
 }
 /* See also float_16_to_32() in wined3d_private.h */
 static inline unsigned short float_32_to_16(const float *in)
 {
    int exp = 0;
    float tmp = fabsf(*in);
    unsigned int mantissa;
    unsigned short ret;
    /* Deal with special numbers */
    if (*in == 0.0f)
        return 0x0000;
    if (isnan(*in))
        return 0x7c01;
    if (isinf(*in))
        return (*in < 0.0f ? 0xfc00 : 0x7c00);
    if (tmp < (float)(1u << 10))
    {
        do
        {
            tmp = tmp * 2.0f;
            exp--;
        } while (tmp < (float)(1u << 10));
    }
    else if (tmp >= (float)(1u << 11))
    {
        do
        {
            tmp /= 2.0f;
            exp++;
        } while (tmp >= (float)(1u << 11));
    }
    mantissa = (unsigned int)tmp;
    if (tmp - mantissa >= 0.5f)
        ++mantissa; /* Round to nearest, away from zero. */
    exp += 10;  /* Normalize the mantissa. */
    exp += 15;  /* Exponent is encoded with excess 15. */
    if (exp > 30) /* too big */
    {
        ret = 0x7c00; /* INF */
    }
    else if (exp <= 0)
    {
        /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
        while (exp <= 0)
        {
            mantissa = mantissa >> 1;
            ++exp;
        }
        ret = mantissa & 0x3ff;
    }
    else
    {
        ret = (exp << 10) | (mantissa & 0x3ff);
    }
    ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
    return ret;
 }
 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
        DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
 {
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@ -6137,6 +6137,31 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format,
        return;
    }
    if ((format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_FLOAT) && format->red_size == 16)
    {
        uint16_t *ret_s = ret;
        switch (format->byte_count)
        {
            case 8:
                ret_s[3] = float_32_to_16(&color->a);
                ret_s[2] = float_32_to_16(&color->b);
                /* fall through */
            case 4:
                ret_s[1] = float_32_to_16(&color->g);
                /* fall through */
            case 2:
                ret_s[0] = float_32_to_16(&color->r);
                break;
            default:
                ERR("Unexpected byte count %u, format %s.\n", format->byte_count, debug_d3dformat(format_id));
                break;
        }
        return;
    }
    FIXME("Conversion for format %s not implemented.\n", debug_d3dformat(format_id));
 }
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@ -354,13 +354,12 @@ static inline GLenum wined3d_gl_min_mip_filter(enum wined3d_texture_filter_type
    return minMipLookup[min_filter].mip[mip_filter];
 }
-/* float_16_to_32() and float_32_to_16() (see implementation in
+/* float_16_to_32() and float_32_to_16() convert 16 bit floats in the
- * surface_base.c) convert 16 bit floats in the FLOAT16 data type
+ * FLOAT16 data type to standard C floats and vice versa. They do not
- * to standard C floats and vice versa. They do not depend on the encoding
+ * depend on the encoding of the C float, so they are platform independent,
- * of the C float, so they are platform independent, but slow. On x86 and
+ * but slow. On x86 and other IEEE 754 compliant platforms the conversion
- * other IEEE 754 compliant platforms the conversion can be accelerated by
+ * can be accelerated by bit shifting the exponent and mantissa. There are
- * bit shifting the exponent and mantissa. There are also some SSE-based
+ * also some SSE-based assembly routines out there.
 * assembly routines out there.
 *
 * See GL_NV_half_float for a reference of the FLOAT16 / GL_HALF format
 */
@ -404,6 +403,68 @@ static inline float float_24_to_32(DWORD in)
    }
 }
 static inline unsigned short float_32_to_16(const float *in)
 {
    int exp = 0;
    float tmp = fabsf(*in);
    unsigned int mantissa;
    unsigned short ret;
    /* Deal with special numbers */
    if (*in == 0.0f)
        return 0x0000;
    if (isnan(*in))
        return 0x7c01;
    if (isinf(*in))
        return (*in < 0.0f ? 0xfc00 : 0x7c00);
    if (tmp < (float)(1u << 10))
    {
        do
        {
            tmp = tmp * 2.0f;
            exp--;
        } while (tmp < (float)(1u << 10));
    }
    else if (tmp >= (float)(1u << 11))
    {
        do
        {
            tmp /= 2.0f;
            exp++;
        } while (tmp >= (float)(1u << 11));
    }
    mantissa = (unsigned int)tmp;
    if (tmp - mantissa >= 0.5f)
        ++mantissa; /* Round to nearest, away from zero. */
    exp += 10;  /* Normalize the mantissa. */
    exp += 15;  /* Exponent is encoded with excess 15. */
    if (exp > 30) /* too big */
    {
        ret = 0x7c00; /* INF */
    }
    else if (exp <= 0)
    {
        /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
        while (exp <= 0)
        {
            mantissa = mantissa >> 1;
            ++exp;
        }
        ret = mantissa & 0x3ff;
    }
    else
    {
        ret = (exp << 10) | (mantissa & 0x3ff);
    }
    ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
    return ret;
 }
 static inline unsigned int wined3d_popcount(unsigned int x)
 {
 #if defined(__MINGW32__)