wined3d: Add support for NV12 textures.

2014-02-17 14:46:00 +02:00 · 2014-02-17 14:46:00 +02:00 · 171529106b
parent 9b8a353462
commit 171529106b
4 changed files with 185 additions and 0 deletions
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@ -6772,6 +6772,7 @@ struct arbfp_blit_priv {
    GLenum yuy2_rect_shader, yuy2_2d_shader;
    GLenum uyvy_rect_shader, uyvy_2d_shader;
    GLenum yv12_rect_shader, yv12_2d_shader;
+    GLenum nv12_rect_shader, nv12_2d_shader;
    GLenum p8_rect_shader, p8_2d_shader;
    GLuint palette_texture;
 };
@ -6798,6 +6799,8 @@ static void arbfp_blit_free(struct wined3d_device *device)
    GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader));
    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_rect_shader));
    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_2d_shader));
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->nv12_rect_shader));
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->nv12_2d_shader));
    GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_rect_shader));
    GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_2d_shader));
    checkGLcall("Delete yuv and p8 programs");
@ -7046,6 +7049,153 @@ static BOOL gen_yv12_read(struct wined3d_shader_buffer *buffer, GLenum textype,
    return TRUE;
 }

+static BOOL gen_nv12_read(struct wined3d_shader_buffer *buffer, GLenum textype,
+        char *luminance)
+{
+    const char *tex;
+    static const float nv12_coef[]
+            = {2.0f / 3.0f, 1.0f / 3.0f, 1.0f, 1.0f};
+
+    switch (textype)
+    {
+        case GL_TEXTURE_2D:
+            tex = "2D";
+            break;
+        case GL_TEXTURE_RECTANGLE_ARB:
+            tex = "RECT";
+            break;
+        default:
+            FIXME("Implement nv12 correction for non-2d, non-rect textures\n");
+            return FALSE;
+    }
+
+    /* NV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
+     * sized plane where each component is an UV pair. So the effective
+     * bitdepth is 12 bits per pixel If the whole texture is interpreted as luminance
+     * data it looks approximately like this:
+     *
+     *        +----------------------------------+----
+     *        |                                  |
+     *        |                                  |
+     *        |                                  |
+     *        |                                  |
+     *        |                                  |   2
+     *        |            LUMINANCE             |   -
+     *        |                                  |   3
+     *        |                                  |
+     *        |                                  |
+     *        |                                  |
+     *        |                                  |
+     *        +----------------------------------+----
+     *        |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
+     *        |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
+     *        |                                  |   1
+     *        |                                  |   -
+     *        |                                  |   3
+     *        |                                  |
+     *        |                                  |
+     *        +----------------------------------+----
+     *
+     * When reading from rectangle textures, keep in mind that the input y coordinates
+     * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height. */
+
+    shader_addline(buffer, "PARAM nv12_coef = ");
+    shader_arb_append_imm_vec4(buffer, nv12_coef);
+    shader_addline(buffer, ";\n");
+
+    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
+    /* We only have half the number of chroma pixels. */
+    shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");
+
+    if (textype == GL_TEXTURE_2D)
+    {
+        shader_addline(buffer, "RCP chroma.w, size.x;\n");
+        shader_addline(buffer, "RCP chroma.z, size.y;\n");
+
+        shader_addline(buffer, "MAD texcrd.y, texcrd.y, nv12_coef.y, nv12_coef.x;\n");
+
+        /* We must not allow filtering horizontally, this would mix U and V.
+         * Vertical filtering is ok. However, bear in mind that the pixel center is at
+         * 0.5, so add 0.5. */
+
+        /* Convert to non-normalized coordinates so we can find the
+         * individual pixel. */
+        shader_addline(buffer, "MUL texcrd.x, texcrd.x, size.x;\n");
+        shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
+        /* Multiply by 2 since chroma components are stored in UV pixel pairs,
+         * add 0.5 to hit the center of the pixel. */
+        shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");
+
+        /* Convert back to normalized coordinates. */
+        shader_addline(buffer, "MUL texcrd.x, texcrd.x, chroma.w;\n");
+
+        /* Clamp, keep the half pixel origin in mind. */
+        shader_addline(buffer, "MAD temp.y, coef.y, chroma.z, nv12_coef.x;\n");
+        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
+        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.z, nv12_coef.z;\n");
+        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
+    }
+    else
+    {
+        /* Read from [size - size+size/2] */
+        shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n");
+
+        shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
+        /* Multiply by 2 since chroma components are stored in UV pixel pairs,
+         * add 0.5 to hit the center of the pixel. */
+        shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");
+
+        /* Clamp */
+        shader_addline(buffer, "MAD temp.y, size.y, coef.y, size.y;\n");
+        shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
+        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
+        shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n");
+        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
+    }
+    /* Read the texture, put the result into the output register. */
+    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
+    shader_addline(buffer, "MOV chroma.y, temp.w;\n");
+
+    if (textype == GL_TEXTURE_2D)
+    {
+        /* Add 1/size.x */
+        shader_addline(buffer, "ADD texcrd.x, texcrd.x, chroma.w;\n");
+    }
+    else
+    {
+        /* Add 1 */
+        shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.x;\n");
+    }
+    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
+    shader_addline(buffer, "MOV chroma.x, temp.w;\n");
+
+    /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
+     * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
+     * values due to filtering. */
+    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
+    if (textype == GL_TEXTURE_2D)
+    {
+        /* Multiply the y coordinate by 2/3 and clamp it */
+        shader_addline(buffer, "MUL texcrd.y, texcrd.y, nv12_coef.x;\n");
+        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, nv12_coef.x;\n");
+        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
+        shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
+    }
+    else
+    {
+        /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
+         * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
+         * is bigger
+         */
+        shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
+        shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
+        shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
+    }
+    *luminance = 'a';
+
+    return TRUE;
+}
+
 static GLuint gen_p8_shader(struct arbfp_blit_priv *priv,
        const struct wined3d_gl_info *gl_info, GLenum textype)
 {
@ -7230,6 +7380,14 @@ static GLuint gen_yuv_shader(struct arbfp_blit_priv *priv, const struct wined3d_
            }
            break;

+        case COMPLEX_FIXUP_NV12:
+            if (!gen_nv12_read(&buffer, textype, &luminance_component))
+            {
+                shader_buffer_free(&buffer);
+                return 0;
+            }
+            break;
+
        default:
            FIXME("Unsupported YUV fixup %#x\n", yuv_fixup);
            shader_buffer_free(&buffer);
@ -7286,6 +7444,13 @@ static GLuint gen_yuv_shader(struct arbfp_blit_priv *priv, const struct wined3d_
            if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yv12_rect_shader = shader;
            else priv->yv12_2d_shader = shader;
            break;
+
+        case COMPLEX_FIXUP_NV12:
+            if (textype == GL_TEXTURE_RECTANGLE_ARB)
+                priv->nv12_rect_shader = shader;
+            else
+                priv->nv12_2d_shader = shader;
+            break;
        default:
            ERR("Unsupported complex fixup: %d\n", yuv_fixup);
    }
@ -7336,6 +7501,10 @@ static HRESULT arbfp_blit_set(void *blit_priv, struct wined3d_context *context,
            shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yv12_rect_shader : priv->yv12_2d_shader;
            break;

+        case COMPLEX_FIXUP_NV12:
+            shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->nv12_rect_shader : priv->nv12_2d_shader;
+            break;
+
        case COMPLEX_FIXUP_P8:
            shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->p8_rect_shader : priv->p8_2d_shader;
            if (!shader) shader = gen_p8_shader(priv, gl_info, textype);
@ -7430,6 +7599,7 @@ static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info, enum win
        case COMPLEX_FIXUP_YUY2:
        case COMPLEX_FIXUP_UYVY:
        case COMPLEX_FIXUP_YV12:
+        case COMPLEX_FIXUP_NV12:
        case COMPLEX_FIXUP_P8:
            TRACE("[OK]\n");
            return TRUE;
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@ -51,6 +51,7 @@ static const struct wined3d_format_channels formats[] =
    {WINED3DFMT_UYVY,                       0,  0,  0,  0,   0,  0,  0,  0,    2,   0,     0},
    {WINED3DFMT_YUY2,                       0,  0,  0,  0,   0,  0,  0,  0,    2,   0,     0},
    {WINED3DFMT_YV12,                       0,  0,  0,  0,   0,  0,  0,  0,    1,   0,     0},
+    {WINED3DFMT_NV12,                       0,  0,  0,  0,   0,  0,  0,  0,    1,   0,     0},
    {WINED3DFMT_DXT1,                       0,  0,  0,  0,   0,  0,  0,  0,    1,   0,     0},
    {WINED3DFMT_DXT2,                       0,  0,  0,  0,   0,  0,  0,  0,    1,   0,     0},
    {WINED3DFMT_DXT3,                       0,  0,  0,  0,   0,  0,  0,  0,    1,   0,     0},
@ -666,6 +667,10 @@ static const struct wined3d_format_texture_info format_texture_info[] =
            GL_ALPHA,                   GL_UNSIGNED_BYTE,                 0,
            WINED3DFMT_FLAG_FILTERING,
            WINED3D_GL_EXT_NONE,        NULL},
+    {WINED3DFMT_NV12,                   GL_ALPHA,                         GL_ALPHA,                               0,
+            GL_ALPHA,                   GL_UNSIGNED_BYTE,                 0,
+            WINED3DFMT_FLAG_FILTERING,
+            WINED3D_GL_EXT_NONE,        NULL},
    {WINED3DFMT_DXT1,                   GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, 0,
            GL_RGBA,                    GL_UNSIGNED_BYTE,                 0,
            WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
@ -1833,6 +1838,12 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_
    gl_info->formats[idx].height_scale.denominator = 2;
    gl_info->formats[idx].color_fixup = create_complex_fixup_desc(COMPLEX_FIXUP_YV12);

+    idx = getFmtIdx(WINED3DFMT_NV12);
+    gl_info->formats[idx].flags |= WINED3DFMT_FLAG_HEIGHT_SCALE;
+    gl_info->formats[idx].height_scale.numerator = 3;
+    gl_info->formats[idx].height_scale.denominator = 2;
+    gl_info->formats[idx].color_fixup = create_complex_fixup_desc(COMPLEX_FIXUP_NV12);
+
    if (gl_info->supported[ARB_FRAGMENT_PROGRAM])
    {
        idx = getFmtIdx(WINED3DFMT_P8_UINT);
@ -2069,6 +2080,7 @@ const char *debug_d3dformat(enum wined3d_format_id format_id)
        FMT_TO_STR(WINED3DFMT_UYVY);
        FMT_TO_STR(WINED3DFMT_YUY2);
        FMT_TO_STR(WINED3DFMT_YV12);
+        FMT_TO_STR(WINED3DFMT_NV12);
        FMT_TO_STR(WINED3DFMT_DXT1);
        FMT_TO_STR(WINED3DFMT_DXT2);
        FMT_TO_STR(WINED3DFMT_DXT3);
@ -2772,6 +2784,7 @@ static const char *debug_complex_fixup(enum complex_fixup fixup)
        WINED3D_TO_STR(COMPLEX_FIXUP_YUY2);
        WINED3D_TO_STR(COMPLEX_FIXUP_UYVY);
        WINED3D_TO_STR(COMPLEX_FIXUP_YV12);
+        WINED3D_TO_STR(COMPLEX_FIXUP_NV12);
        WINED3D_TO_STR(COMPLEX_FIXUP_P8);
 #undef WINED3D_TO_STR
        default:
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@ -85,6 +85,7 @@ enum complex_fixup
    COMPLEX_FIXUP_UYVY = 2,
    COMPLEX_FIXUP_YV12 = 3,
    COMPLEX_FIXUP_P8   = 4,
+    COMPLEX_FIXUP_NV12 = 5,
 };

 #include <pshpack2.h>
--- a/include/wine/wined3d.h
+++ b/include/wine/wined3d.h
@ -249,6 +249,7 @@ enum wined3d_format_id
    WINED3DFMT_NULL                         = WINEMAKEFOURCC('N','U','L','L'),
    WINED3DFMT_R16                          = WINEMAKEFOURCC(' ','R','1','6'),
    WINED3DFMT_AL16                         = WINEMAKEFOURCC('A','L','1','6'),
+    WINED3DFMT_NV12                         = WINEMAKEFOURCC('N','V','1','2'),

    WINED3DFMT_FORCE_DWORD = 0xffffffff
 };