From 0bf32b12f5a97045c4c2785260961e53184c29ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20D=C3=B6singer?= Date: Mon, 24 Nov 2008 11:55:50 +0100 Subject: [PATCH] wined3d: Add the ability to duplicate GL pixel shaders. Some stateblock parameters have to be compiled into the GL pixel shader code, like lines for pixelformat fixups. This leads to problems when applications switch those settings, requiring a recompilation of the shader. This patch enables wined3d to have multiple GL shaders for a D3D shader(pixel shaders only so far) to handle this more efficiently. --- dlls/wined3d/arb_program_shader.c | 116 ++++++++------------------ dlls/wined3d/baseshader.c | 3 +- dlls/wined3d/glsl_shader.c | 111 ++++++++++--------------- dlls/wined3d/pixelshader.c | 134 ++++++++++-------------------- dlls/wined3d/wined3d_private.h | 21 ++--- 5 files changed, 134 insertions(+), 251 deletions(-) diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c index f6cefe3de6a..3a524533f8b 100644 --- a/dlls/wined3d/arb_program_shader.c +++ b/dlls/wined3d/arb_program_shader.c @@ -220,32 +220,6 @@ static void shader_arb_load_constants( deviceImpl->activeContext->pshader_const_dirty[psi->luminanceconst[i].const_num] = 1; } } - - if(((IWineD3DPixelShaderImpl *) pshader)->srgb_enabled && - !((IWineD3DPixelShaderImpl *) pshader)->srgb_mode_hardcoded) { - float comparison[4]; - float mul_low[4]; - - if(stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) { - comparison[0] = srgb_cmp; comparison[1] = srgb_cmp; - comparison[2] = srgb_cmp; comparison[3] = srgb_cmp; - - mul_low[0] = srgb_mul_low; mul_low[1] = srgb_mul_low; - mul_low[2] = srgb_mul_low; mul_low[3] = srgb_mul_low; - } else { - comparison[0] = 1.0 / 0.0; comparison[1] = 1.0 / 0.0; - comparison[2] = 1.0 / 0.0; comparison[3] = 1.0 / 0.0; - - mul_low[0] = 1.0; mul_low[1] = 1.0; - mul_low[2] = 1.0; mul_low[3] = 1.0; - } - GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, psi->srgb_cmp_const, comparison)); - GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, psi->srgb_low_const, mul_low)); - checkGLcall("Load sRGB correction constants\n"); - deviceImpl->activeContext->pshader_const_dirty[psi->srgb_low_const] = 1; - deviceImpl->activeContext->pshader_const_dirty[psi->srgb_cmp_const] = 1; - - } } } @@ -325,49 +299,16 @@ static void shader_generate_arb_declarations( } if(device->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE] && pshader) { - IWineD3DPixelShaderImpl *ps_impl = (IWineD3DPixelShaderImpl *) This; - /* If there are 2 constants left to use, use them to pass the sRGB correction values in. This way - * srgb write correction can be turned on and off dynamically without recompilation. Otherwise - * hardcode them. The drawback of hardcoding is that the shader needs recompilation to turn sRGB - * off again - */ - if(max_constantsF + extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF) && FALSE) { - /* The idea is that if srgb is enabled, then disabled, the constant loading code - * can effectively disable sRGB correction by passing 1.0 and INF as the multiplication - * and comparison constants. If it disables it that way, the shader won't be recompiled - * and the code will stay in, so sRGB writing can be turned on again by setting the - * constants from the spec - */ - ps_impl->srgb_mode_hardcoded = 0; - ps_impl->srgb_low_const = GL_LIMITS(pshader_constantsF) - extra_constants_needed; - ps_impl->srgb_cmp_const = GL_LIMITS(pshader_constantsF) - extra_constants_needed - 1; - shader_addline(buffer, "PARAM srgb_mul_low = program.env[%d];\n", ps_impl->srgb_low_const); - shader_addline(buffer, "PARAM srgb_comparison = program.env[%d];\n", ps_impl->srgb_cmp_const); - } else { - shader_addline(buffer, "PARAM srgb_mul_low = {%f, %f, %f, 1.0};\n", - srgb_mul_low, srgb_mul_low, srgb_mul_low); - shader_addline(buffer, "PARAM srgb_comparison = {%f, %f, %f, %f};\n", - srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp); - ps_impl->srgb_mode_hardcoded = 1; - } - /* These can be hardcoded, they do not cause any harm because no fragment will enter the high - * path if the comparison value is set to INF - */ + shader_addline(buffer, "PARAM srgb_mul_low = {%f, %f, %f, 1.0};\n", + srgb_mul_low, srgb_mul_low, srgb_mul_low); + shader_addline(buffer, "PARAM srgb_comparison = {%f, %f, %f, %f};\n", + srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp); shader_addline(buffer, "PARAM srgb_pow = {%f, %f, %f, 1.0};\n", srgb_pow, srgb_pow, srgb_pow); shader_addline(buffer, "PARAM srgb_mul_hi = {%f, %f, %f, 1.0};\n", srgb_mul_high, srgb_mul_high, srgb_mul_high); shader_addline(buffer, "PARAM srgb_sub_hi = {%f, %f, %f, 0.0};\n", srgb_sub_high, srgb_sub_high, srgb_sub_high); - ps_impl->srgb_enabled = 1; - } else if(pshader) { - IWineD3DPixelShaderImpl *ps_impl = (IWineD3DPixelShaderImpl *) This; - - /* Do not write any srgb fixup into the shader to save shader size and processing time. - * As a consequence, we can't toggle srgb write on without recompilation - */ - ps_impl->srgb_enabled = 0; - ps_impl->srgb_mode_hardcoded = 1; } /* Load local constants using the program-local space, @@ -1898,9 +1839,8 @@ static void shader_arb_select(IWineD3DDevice *iface, BOOL usePS, BOOL useVS) { struct ps_compile_args compile_args; TRACE("Using pixel shader\n"); find_ps_compile_args((IWineD3DPixelShaderImpl *) This->stateBlock->pixelShader, This->stateBlock, &compile_args); - pixelshader_compile(This->stateBlock->pixelShader, &compile_args); - - priv->current_fprogram_id = ((IWineD3DPixelShaderImpl *)This->stateBlock->pixelShader)->prgId; + priv->current_fprogram_id = find_gl_pshader((IWineD3DPixelShaderImpl *) This->stateBlock->pixelShader, + &compile_args); /* Bind the fragment program */ GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); @@ -1978,24 +1918,33 @@ static void shader_arb_cleanup(IWineD3DDevice *iface) { } static void shader_arb_destroy(IWineD3DBaseShader *iface) { - IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) iface; - WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *) This->baseShader.device)->adapter->gl_info; - char pshader = shader_is_pshader_version(This->baseShader.hex_version); + IWineD3DBaseShaderImpl *baseShader = (IWineD3DBaseShaderImpl *) iface; + WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *) baseShader->baseShader.device)->adapter->gl_info; + char pshader = shader_is_pshader_version(baseShader->baseShader.hex_version); if(pshader) { + IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *) iface; + UINT i; + ENTER_GL(); - GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DPixelShaderImpl *) This)->prgId)); - checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DPixelShaderImpl *) This)->prgId))"); - ((IWineD3DPixelShaderImpl *) This)->prgId = 0; + for(i = 0; i < This->num_gl_shaders; i++) { + GL_EXTCALL(glDeleteProgramsARB(1, &This->gl_shaders[i].prgId)); + checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &This->gl_shaders[i].prgId))"); + } LEAVE_GL(); + HeapFree(GetProcessHeap(), 0, This->gl_shaders); + This->gl_shaders = NULL; + This->num_gl_shaders = 0; } else { + IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *) iface; + ENTER_GL(); - GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DVertexShaderImpl *) This)->prgId)); - checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &((IWineD3DPixelShaderImpl *) This)->prgId))"); + GL_EXTCALL(glDeleteProgramsARB(1, &This->prgId)); + checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &This->prgId))"); ((IWineD3DVertexShaderImpl *) This)->prgId = 0; LEAVE_GL(); } - This->baseShader.is_compiled = FALSE; + baseShader->baseShader.is_compiled = FALSE; } static HRESULT shader_arb_alloc(IWineD3DDevice *iface) { @@ -2048,13 +1997,14 @@ static void arbfp_add_sRGB_correction(SHADER_BUFFER *buffer, const char *fragcol /* [0.0;1.0] clamping. Not needed, this is done implicitly */ } -static void shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) { +static GLuint shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) { IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface; shader_reg_maps* reg_maps = &This->baseShader.reg_maps; CONST DWORD *function = This->baseShader.function; const char *fragcolor; WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info; local_constant* lconst; + GLuint retval; /* Create the hw ARB shader */ shader_addline(buffer, "!!ARBfp1.0\n"); @@ -2091,7 +2041,7 @@ static void shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE } else { fragcolor = "TMP_COLOR"; } - if(This->srgb_enabled) { + if(((IWineD3DDeviceImpl *)This->baseShader.device)->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) { arbfp_add_sRGB_correction(buffer, fragcolor, "TMP", "TMP2", "TA", "TB"); } if (This->baseShader.hex_version < WINED3DPS_VERSION(3,0)) { @@ -2102,12 +2052,12 @@ static void shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE shader_addline(buffer, "END\n"); /* TODO: change to resource.glObjectHandle or something like that */ - GL_EXTCALL(glGenProgramsARB(1, &This->prgId)); + GL_EXTCALL(glGenProgramsARB(1, &retval)); - TRACE("Creating a hw pixel shader, prg=%d\n", This->prgId); - GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId)); + TRACE("Creating a hw pixel shader, prg=%d\n", retval); + GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval)); - TRACE("Created hw pixel shader, prg=%d\n", This->prgId); + TRACE("Created hw pixel shader, prg=%d\n", retval); /* Create the program and check for errors */ GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, buffer->bsize, buffer->buffer)); @@ -2117,7 +2067,7 @@ static void shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos); FIXME("HW PixelShader Error at position %d: %s\n", errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); - This->prgId = -1; + retval = 0; } /* Load immediate constants */ @@ -2128,6 +2078,8 @@ static void shader_arb_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFE checkGLcall("glProgramLocalParameter4fvARB"); } } + + return retval; } static void shader_arb_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) { diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c index f7c8d35805c..292fede18cc 100644 --- a/dlls/wined3d/baseshader.c +++ b/dlls/wined3d/baseshader.c @@ -1101,8 +1101,9 @@ static void shader_none_destroy(IWineD3DBaseShader *iface) {} static HRESULT shader_none_alloc(IWineD3DDevice *iface) {return WINED3D_OK;} static void shader_none_free(IWineD3DDevice *iface) {} static BOOL shader_none_dirty_const(IWineD3DDevice *iface) {return FALSE;} -static void shader_none_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) { +static GLuint shader_none_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) { FIXME("NONE shader backend asked to generate a pixel shader\n"); + return 0; } static void shader_none_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) { FIXME("NONE shader backend asked to generate a vertex shader\n"); diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index eeec2604c25..d0d7491f2bd 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -73,8 +73,6 @@ struct glsl_shader_prog_link { GLhandleARB bumpenvmat_location[MAX_TEXTURES]; GLhandleARB luminancescale_location[MAX_TEXTURES]; GLhandleARB luminanceoffset_location[MAX_TEXTURES]; - GLhandleARB srgb_comparison_location; - GLhandleARB srgb_mul_low_location; GLhandleARB ycorrection_location; GLenum vertex_color_clamp; GLhandleARB vshader; @@ -492,28 +490,6 @@ static void shader_glsl_load_constants( } } - if(((IWineD3DPixelShaderImpl *) pshader)->srgb_enabled && - !((IWineD3DPixelShaderImpl *) pshader)->srgb_mode_hardcoded) { - float comparison[4]; - float mul_low[4]; - - if(stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) { - comparison[0] = srgb_cmp; comparison[1] = srgb_cmp; - comparison[2] = srgb_cmp; comparison[3] = srgb_cmp; - - mul_low[0] = srgb_mul_low; mul_low[1] = srgb_mul_low; - mul_low[2] = srgb_mul_low; mul_low[3] = srgb_mul_low; - } else { - comparison[0] = 1.0 / 0.0; comparison[1] = 1.0 / 0.0; - comparison[2] = 1.0 / 0.0; comparison[3] = 1.0 / 0.0; - - mul_low[0] = 1.0; mul_low[1] = 1.0; - mul_low[2] = 1.0; mul_low[3] = 1.0; - } - - GL_EXTCALL(glUniform4fvARB(prog->srgb_comparison_location, 1, comparison)); - GL_EXTCALL(glUniform4fvARB(prog->srgb_mul_low_location, 1, mul_low)); - } if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) { float correction_params[4]; if(deviceImpl->render_offscreen) { @@ -608,27 +584,10 @@ static void shader_generate_glsl_declarations( } if(device->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) { - ps_impl->srgb_enabled = 1; - if(This->baseShader.limits.constant_float + extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) { - shader_addline(buffer, "uniform vec4 srgb_mul_low;\n"); - shader_addline(buffer, "uniform vec4 srgb_comparison;\n"); - ps_impl->srgb_mode_hardcoded = 0; - extra_constants_needed++; - } else { - ps_impl->srgb_mode_hardcoded = 1; - shader_addline(buffer, "const vec4 srgb_mul_low = vec4(%f, %f, %f, %f);\n", - srgb_mul_low, srgb_mul_low, srgb_mul_low, srgb_mul_low); - shader_addline(buffer, "const vec4 srgb_comparison = vec4(%f, %f, %f, %f);\n", - srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp); - } - } else { - IWineD3DPixelShaderImpl *ps_impl = (IWineD3DPixelShaderImpl *) This; - - /* Do not write any srgb fixup into the shader to save shader size and processing time. - * As a consequence, we can't toggle srgb write on without recompilation - */ - ps_impl->srgb_enabled = 0; - ps_impl->srgb_mode_hardcoded = 1; + shader_addline(buffer, "const vec4 srgb_mul_low = vec4(%f, %f, %f, %f);\n", + srgb_mul_low, srgb_mul_low, srgb_mul_low, srgb_mul_low); + shader_addline(buffer, "const vec4 srgb_comparison = vec4(%f, %f, %f, %f);\n", + srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp); } if(reg_maps->vpos || reg_maps->usesdsy) { if(This->baseShader.limits.constant_float + extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) { @@ -2798,7 +2757,8 @@ static void pshader_glsl_dp2add(SHADER_OPCODE_ARG* arg) { static void pshader_glsl_input_pack( SHADER_BUFFER* buffer, semantic* semantics_in, - IWineD3DPixelShader *iface) { + IWineD3DPixelShader *iface, + enum vertexprocessing_mode vertexprocessing) { unsigned int i; IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *) iface; @@ -2819,7 +2779,7 @@ static void pshader_glsl_input_pack( switch(usage) { case WINED3DDECLUSAGE_TEXCOORD: - if(usage_idx < 8 && This->vertexprocessing == pretransformed) { + if(usage_idx < 8 && vertexprocessing == pretransformed) { shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n", This->input_reg_map[i], reg_mask, usage_idx, reg_mask); } else { @@ -3236,8 +3196,7 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use if(use_ps) { struct ps_compile_args compile_args; find_ps_compile_args((IWineD3DPixelShaderImpl*)This->stateBlock->pixelShader, This->stateBlock, &compile_args); - pixelshader_compile(pshader, &compile_args); - pshader_id = ((IWineD3DPixelShaderImpl*)pshader)->prgId; + pshader_id = find_gl_pshader((IWineD3DPixelShaderImpl *) pshader, &compile_args); } else { pshader_id = 0; } @@ -3347,8 +3306,6 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup")); - entry->srgb_comparison_location = GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_comparison")); - entry->srgb_mul_low_location = GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_mul_low")); entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection")); checkGLcall("Find glsl program uniform locations"); @@ -3535,7 +3492,8 @@ static void shader_glsl_destroy(IWineD3DBaseShader *iface) { IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device; struct shader_glsl_priv *priv = (struct shader_glsl_priv *)device->shader_priv; WineD3D_GL_Info *gl_info = &device->adapter->gl_info; - GLuint *prog; + IWineD3DPixelShaderImpl *ps = NULL; + IWineD3DVertexShaderImpl *vs = NULL; /* Note: Do not use QueryInterface here to find out which shader type this is because this code * can be called from IWineD3DBaseShader::Release @@ -3543,11 +3501,13 @@ static void shader_glsl_destroy(IWineD3DBaseShader *iface) { char pshader = shader_is_pshader_version(This->baseShader.hex_version); if(pshader) { - prog = &((IWineD3DPixelShaderImpl *) This)->prgId; + ps = (IWineD3DPixelShaderImpl *) This; + if(ps->num_gl_shaders == 0) return; } else { - prog = &((IWineD3DVertexShaderImpl *) This)->prgId; + vs = (IWineD3DVertexShaderImpl *) This; + if(vs->prgId == 0) return; } - if(*prog == 0) return; + linked_programs = &This->baseShader.linked_programs; TRACE("Deleting linked programs\n"); @@ -3565,11 +3525,28 @@ static void shader_glsl_destroy(IWineD3DBaseShader *iface) { } } - TRACE("Deleting shader object %u\n", *prog); - GL_EXTCALL(glDeleteObjectARB(*prog)); - checkGLcall("glDeleteObjectARB"); - *prog = 0; - This->baseShader.is_compiled = FALSE; + if(pshader) { + UINT i; + + ENTER_GL(); + for(i = 0; i < ps->num_gl_shaders; i++) { + TRACE("deleting pshader %u\n", ps->gl_shaders[i].prgId); + GL_EXTCALL(glDeleteObjectARB(ps->gl_shaders[i].prgId)); + checkGLcall("glDeleteObjectARB"); + } + LEAVE_GL(); + HeapFree(GetProcessHeap(), 0, ps->gl_shaders); + ps->gl_shaders = NULL; + ps->num_gl_shaders = 0; + } else { + TRACE("Deleting shader object %u\n", vs->prgId); + ENTER_GL(); + GL_EXTCALL(glDeleteObjectARB(vs->prgId)); + checkGLcall("glDeleteObjectARB"); + LEAVE_GL(); + vs->prgId = 0; + vs->baseShader.is_compiled = FALSE; + } } static unsigned int glsl_program_key_hash(void *key) { @@ -3626,7 +3603,7 @@ static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) { return FALSE; } -static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) { +static GLuint shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) { IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface; shader_reg_maps* reg_maps = &This->baseShader.reg_maps; CONST DWORD *function = This->baseShader.function; @@ -3655,13 +3632,9 @@ static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF if (This->baseShader.hex_version >= WINED3DPS_VERSION(3,0)) { if(((IWineD3DDeviceImpl *) This->baseShader.device)->strided_streams.u.s.position_transformed) { - This->vertexprocessing = pretransformed; - pshader_glsl_input_pack(buffer, This->semantics_in, iface); + pshader_glsl_input_pack(buffer, This->semantics_in, iface, pretransformed); } else if(!use_vs((IWineD3DDeviceImpl *) This->baseShader.device)) { - This->vertexprocessing = fixedfunction; - pshader_glsl_input_pack(buffer, This->semantics_in, iface); - } else { - This->vertexprocessing = vertexshader; + pshader_glsl_input_pack(buffer, This->semantics_in, iface, fixedfunction); } } @@ -3682,7 +3655,7 @@ static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF } else { fragcolor = "gl_FragColor"; } - if(This->srgb_enabled) { + if(((IWineD3DDeviceImpl *)This->baseShader.device)->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) { shader_addline(buffer, "tmp0.xyz = pow(%s.xyz, vec3(%f, %f, %f)) * vec3(%f, %f, %f) - vec3(%f, %f, %f);\n", fragcolor, srgb_pow, srgb_pow, srgb_pow, srgb_mul_high, srgb_mul_high, srgb_mul_high, srgb_sub_high, srgb_sub_high, srgb_sub_high); @@ -3711,7 +3684,7 @@ static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF print_glsl_info_log(&GLINFO_LOCATION, shader_obj); /* Store the shader object */ - This->prgId = shader_obj; + return shader_obj; } static void shader_glsl_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) { diff --git a/dlls/wined3d/pixelshader.c b/dlls/wined3d/pixelshader.c index 1ce2849460d..7b2f639d42a 100644 --- a/dlls/wined3d/pixelshader.c +++ b/dlls/wined3d/pixelshader.c @@ -272,32 +272,16 @@ static void pshader_set_limits( /** Generate a pixel shader string using either GL_FRAGMENT_PROGRAM_ARB or GLSL and send it to the card */ -static inline VOID IWineD3DPixelShaderImpl_GenerateShader( - IWineD3DPixelShader *iface) { - IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface; +static inline GLuint IWineD3DPixelShaderImpl_GenerateShader( + IWineD3DPixelShaderImpl *This) { SHADER_BUFFER buffer; -#if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders - it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */ - if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) { - HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer); - This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE); - This->fixupVertexBufferSize = PGMSIZE; - This->fixupVertexBuffer[0] = 0; - } - buffer.buffer = This->device->fixupVertexBuffer; -#else - buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); -#endif + buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); buffer.bsize = 0; buffer.lineNo = 0; buffer.newline = TRUE; - ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_pshader(iface, &buffer); - -#if 1 /* if were using the data buffer of device then we don't need to free it */ - HeapFree(GetProcessHeap(), 0, buffer.buffer); -#endif + return ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_pshader((IWineD3DPixelShader *) This, &buffer); } static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) { @@ -384,89 +368,29 @@ static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *i return WINED3D_OK; } -HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct ps_compile_args *args) { - - IWineD3DPixelShaderImpl *This =(IWineD3DPixelShaderImpl *)iface; - IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device; +GLuint pixelshader_compile(IWineD3DPixelShaderImpl *This, struct ps_compile_args *args) { CONST DWORD *function = This->baseShader.function; - UINT i, sampler; HRESULT hr; + GLuint retval; - TRACE("(%p) : function %p\n", iface, function); + TRACE("(%p) : function %p\n", This, function); - /* We're already compiled, but check if any of the hardcoded stateblock assumptions - * changed. - */ - if (This->baseShader.is_compiled) { - for(i = 0; i < This->baseShader.num_sampled_samplers; i++) { - sampler = This->baseShader.sampled_samplers[i]; - if(args->format_conversion[sampler] != This->baseShader.sampled_format[sampler]) { - WARN("Recompiling shader %p due to format change on sampler %d\n", This, sampler); - WARN("Old format group %s, new is %s\n", - debug_d3dformat(This->baseShader.sampled_format[sampler]), - debug_d3dformat(args->format_conversion[sampler])); - goto recompile; - } - } - - /* TODO: Check projected textures */ - /* TODO: Check texture types(2D, Cube, 3D) */ - - if(args->srgb_correction != This->srgb_enabled && This->srgb_mode_hardcoded) { - WARN("Recompiling shader because srgb correction is different and hardcoded\n"); - goto recompile; - } - if(This->baseShader.reg_maps.vpos && !This->vpos_uniform) { - if(This->render_offscreen != deviceImpl->render_offscreen || - This->height != ((IWineD3DSurfaceImpl *) deviceImpl->render_targets[0])->currentDesc.Height) { - WARN("Recompiling shader because vpos is used, hard compiled and changed\n"); - goto recompile; - } - } - if(This->baseShader.reg_maps.usesdsy && !This->vpos_uniform) { - if(This->render_offscreen ? 0 : 1 != deviceImpl->render_offscreen ? 0 : 1) { - WARN("Recompiling shader because dsy is used, hard compiled and render_offscreen changed\n"); - goto recompile; - } - } - if(This->baseShader.hex_version >= WINED3DPS_VERSION(3,0)) { - if(args->vp_mode != This->vertexprocessing) { - WARN("Recompiling shader because the vertex processing mode changed\n"); - goto recompile; - } - } - - return WINED3D_OK; - - recompile: - if(This->baseShader.recompile_count > 50) { - FIXME("Shader %p recompiled more than 50 times\n", This); - } else { - This->baseShader.recompile_count++; - } - - deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface); + hr = IWineD3DPixelShader_UpdateSamplers((IWineD3DPixelShader *) This); + if(FAILED(hr)) { + ERR("Failed to update sampler information\n"); + return 0; } - /* We don't need to compile */ - if (!function) { - This->baseShader.is_compiled = TRUE; - return WINED3D_OK; - } - - hr = IWineD3DPixelShader_UpdateSamplers(iface); - if(FAILED(hr)) return hr; - /* Reset fields tracking stateblock values being hardcoded in the shader */ This->baseShader.num_sampled_samplers = 0; /* Generate the HW shader */ TRACE("(%p) : Generating hardware program\n", This); - IWineD3DPixelShaderImpl_GenerateShader(iface); + retval = IWineD3DPixelShaderImpl_GenerateShader(This); This->baseShader.is_compiled = TRUE; - return WINED3D_OK; + return retval; } static HRESULT WINAPI IWineD3DPixelShaderImpl_UpdateSamplers(IWineD3DPixelShader *iface) { @@ -532,3 +456,35 @@ void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImp args->vp_mode = vertexshader; } } + +GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct ps_compile_args *args) { + UINT i; + struct ps_compiled_shader *old_array; + + /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), + * so a linear search is more performant than a hashmap + */ + for(i = 0; i < shader->num_gl_shaders; i++) { + if(memcmp(&shader->gl_shaders[i].args, args, sizeof(*args)) == 0) { + return shader->gl_shaders[i].prgId; + } + } + + TRACE("No matching GL shader found, compiling a new shader\n"); + old_array = shader->gl_shaders; + if(old_array) { + shader->gl_shaders = HeapReAlloc(GetProcessHeap(), 0, old_array, + (shader->num_gl_shaders + 1) * sizeof(*shader->gl_shaders)); + } else { + shader->gl_shaders = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader->gl_shaders)); + } + + if(!shader->gl_shaders) { + ERR("Out of memory\n"); + return 0; + } + + shader->gl_shaders[shader->num_gl_shaders].args = *args; + shader->gl_shaders[shader->num_gl_shaders].prgId = pixelshader_compile(shader, args); + return shader->gl_shaders[shader->num_gl_shaders++].prgId; +} diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index b5933544da1..cb3a26d7c47 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -350,7 +350,7 @@ typedef struct { HRESULT (*shader_alloc_private)(IWineD3DDevice *iface); void (*shader_free_private)(IWineD3DDevice *iface); BOOL (*shader_dirtifyable_constants)(IWineD3DDevice *iface); - void (*shader_generate_pshader)(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer); + GLuint (*shader_generate_pshader)(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer); void (*shader_generate_vshader)(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer); void (*shader_get_caps)(WINED3DDEVTYPE devtype, WineD3D_GL_Info *gl_info, struct shader_caps *caps); BOOL (*shader_conv_supported)(WINED3DFORMAT conv); @@ -2346,6 +2346,13 @@ struct ps_compile_args { BOOL srgb_correction; WINED3DFORMAT format_conversion[MAX_FRAGMENT_SAMPLERS]; enum vertexprocessing_mode vp_mode; + /* Projected textures(ps 1.0-1.3) */ + /* Texture types(2D, Cube, 3D) in ps 1.x */ +}; + +struct ps_compiled_shader { + struct ps_compile_args args; + GLuint prgId; }; typedef struct IWineD3DPixelShaderImpl { @@ -2365,25 +2372,19 @@ typedef struct IWineD3DPixelShaderImpl { int declared_in_count; /* The GL shader */ - GLuint prgId; + struct ps_compiled_shader *gl_shaders; + UINT num_gl_shaders; /* Some information about the shader behavior */ struct stb_const_desc bumpenvmatconst[MAX_TEXTURES]; char numbumpenvmatconsts; struct stb_const_desc luminanceconst[MAX_TEXTURES]; - char srgb_enabled; - char srgb_mode_hardcoded; - UINT srgb_low_const; - UINT srgb_cmp_const; char vpos_uniform; - BOOL render_offscreen; - UINT height; - enum vertexprocessing_mode vertexprocessing; } IWineD3DPixelShaderImpl; extern const SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[]; extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl; -HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct ps_compile_args *args); +GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct ps_compile_args *args); void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct ps_compile_args *args); /* sRGB correction constants */