From de12f880ced7318ba0f8a62946fe0bd292ce66d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20D=C3=B6singer?= Date: Fri, 29 May 2009 17:11:04 +0200 Subject: [PATCH] wined3d: Don't enable the NV frag extensions if we don't need them. Enabling the NV extensions occupies a temp register for some reason. Avoid needlessly enabling it. --- dlls/wined3d/arb_program_shader.c | 36 +++++++++++++++++++++++++++++-- dlls/wined3d/baseshader.c | 4 ++++ dlls/wined3d/wined3d_private.h | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c index 2a3456710fb..0d32bbc0400 100644 --- a/dlls/wined3d/arb_program_shader.c +++ b/dlls/wined3d/arb_program_shader.c @@ -2058,6 +2058,7 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This); struct shader_arb_ctx_priv priv_ctx; BOOL dcl_tmp = args->super.srgb_correction, dcl_td = FALSE; + BOOL want_nv_prog = FALSE; char srgbtmp[4][4]; unsigned int i, found = 0; @@ -2103,14 +2104,38 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, priv_ctx.cur_ps_args = args; list_init(&priv_ctx.if_frames); + /* Avoid enabling NV_fragment_program* if we do not need it. + * + * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register, + * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation + * is faster than what we gain from using higher native instructions. There are some things though + * that cannot be emulated. In that case enable the extensions. + * If the extension is enabled, instruction handlers that support both ways will use it. + * + * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program. + * So enable the best we can get. + */ + if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0) + { + want_nv_prog = TRUE; + } + shader_addline(buffer, "!!ARBfp1.0\n"); - if(GL_SUPPORT(NV_FRAGMENT_PROGRAM2)) { + if(want_nv_prog && GL_SUPPORT(NV_FRAGMENT_PROGRAM2)) { shader_addline(buffer, "OPTION NV_fragment_program2;\n"); priv_ctx.target_version = NV3; - } else if(GL_SUPPORT(NV_FRAGMENT_PROGRAM_OPTION)) { + } else if(want_nv_prog && GL_SUPPORT(NV_FRAGMENT_PROGRAM_OPTION)) { shader_addline(buffer, "OPTION NV_fragment_program;\n"); priv_ctx.target_version = NV2; } else { + if(want_nv_prog) + { + /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some + * limits properly + */ + ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n"); + ERR("Try GLSL\n"); + } priv_ctx.target_version = ARB; } @@ -2131,6 +2156,10 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, } } + /* For now always declare the temps. At least the Nvidia assembler optimizes completely + * unused temps away(but occupies them for the whole shader if they're used once). Always + * declaring them avoids tricky bookkeeping work + */ shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */ shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */ shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */ @@ -2227,6 +2256,9 @@ static GLuint shader_arb_generate_vshader(IWineD3DVertexShaderImpl *This, /* Create the hw ARB shader */ shader_addline(buffer, "!!ARBvp1.0\n"); + /* Always enable the NV extension if available. Unlike fragment shaders, there is no + * mesurable performance penalty, and we can always make use of it for clipplanes. + */ if(GL_SUPPORT(NV_VERTEX_PROGRAM2_OPTION)) { shader_addline(buffer, "OPTION NV_vertex_program2;\n"); priv_ctx.target_version = NV2; diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c index 9dcdbe6f332..1a34fa2e9db 100644 --- a/dlls/wined3d/baseshader.c +++ b/dlls/wined3d/baseshader.c @@ -683,6 +683,10 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3 { reg_maps->usesdsy = 1; } + else if (ins.handler_idx == WINED3DSIH_DSX) + { + reg_maps->usesdsx = 1; + } else if(ins.handler_idx == WINED3DSIH_TEXLDD) { reg_maps->usestexldd = 1; diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 8b7bff8d781..f2173667c5c 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -632,7 +632,7 @@ typedef struct shader_reg_maps WINED3DSAMPLER_TEXTURE_TYPE sampler_type[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)]; BOOL bumpmat[MAX_TEXTURES], luminanceparams[MAX_TEXTURES]; - char usesnrm, vpos, usesdsy, usestexldd, usesmova; + char usesnrm, vpos, usesdsx, usesdsy, usestexldd, usesmova; char usesrelconstF; /* Whether or not loops are used in this shader, and nesting depth */