From e00187616307740bd1da7c2ccde548f56a9a115a Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 28 Apr 2009 09:53:28 +0200 Subject: [PATCH] wined3d: Create a separate function to record register usage. --- dlls/wined3d/baseshader.c | 146 ++++++++++++++++++--------------- dlls/wined3d/glsl_shader.c | 20 +++-- dlls/wined3d/wined3d_private.h | 3 +- 3 files changed, 96 insertions(+), 73 deletions(-) diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c index e9cb760d2ef..21c99fccb7e 100644 --- a/dlls/wined3d/baseshader.c +++ b/dlls/wined3d/baseshader.c @@ -468,6 +468,78 @@ static void shader_delete_constant_list(struct list* clist) { list_init(clist); } +static void shader_record_register_usage(IWineD3DBaseShaderImpl *This, struct shader_reg_maps *reg_maps, + DWORD register_type, UINT register_idx, BOOL has_rel_addr, BOOL pshader) +{ + switch (register_type) + { + case WINED3DSPR_TEXTURE: /* WINED3DSPR_ADDR */ + if (pshader) reg_maps->texcoord[register_idx] = 1; + else reg_maps->address[register_idx] = 1; + break; + + case WINED3DSPR_TEMP: + reg_maps->temporary[register_idx] = 1; + break; + + case WINED3DSPR_INPUT: + if (!pshader) reg_maps->attributes[register_idx] = 1; + else + { + if (has_rel_addr) + { + /* If relative addressing is used, we must assume that all registers + * are used. Even if it is a construct like v3[aL], we can't assume + * that v0, v1 and v2 aren't read because aL can be negative */ + unsigned int i; + for (i = 0; i < MAX_REG_INPUT; ++i) + { + ((IWineD3DPixelShaderImpl *)This)->input_reg_used[i] = TRUE; + } + } + else + { + ((IWineD3DPixelShaderImpl *)This)->input_reg_used[register_idx] = TRUE; + } + } + break; + + case WINED3DSPR_RASTOUT: + if (register_idx == 1) reg_maps->fog = 1; + break; + + case WINED3DSPR_MISCTYPE: + if (pshader && register_idx == 0) reg_maps->vpos = 1; + break; + + case WINED3DSPR_CONST: + if (has_rel_addr) + { + if (!pshader) + { + if (register_idx <= ((IWineD3DVertexShaderImpl *)This)->min_rel_offset) + ((IWineD3DVertexShaderImpl *)This)->min_rel_offset = register_idx; + else if (register_idx >= ((IWineD3DVertexShaderImpl *)This)->max_rel_offset) + ((IWineD3DVertexShaderImpl *)This)->max_rel_offset = register_idx; + } + reg_maps->usesrelconstF = TRUE; + } + break; + + case WINED3DSPR_CONSTINT: + reg_maps->integer_constants |= (1 << register_idx); + break; + + case WINED3DSPR_CONSTBOOL: + reg_maps->boolean_constants |= (1 << register_idx); + break; + + default: + TRACE("Not recording register of type %#x and idx %u\n", register_type, register_idx); + break; + } +} + /* Note that this does not count the loop register * as an address register. */ @@ -481,7 +553,6 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m unsigned int cur_loop_depth = 0, max_loop_depth = 0; const DWORD* pToken = byte_code; char pshader; - unsigned int intconst = 0, boolconst = 0; /* There are some minor differences between pixel and vertex shaders */ @@ -625,7 +696,7 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m pToken += param_size; /* Rep and Loop always use an integer constant for the control parameters */ - intconst |= (1 << (reg & WINED3DSP_REGNUM_MASK)); + reg_maps->integer_constants |= (1 << (reg & WINED3DSP_REGNUM_MASK)); } else if (ins.handler_idx == WINED3DSIH_ENDLOOP || ins.handler_idx == WINED3DSIH_ENDREP) @@ -708,70 +779,19 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m regtype = shader_get_regtype(param); reg = param & WINED3DSP_REGNUM_MASK; - if (WINED3DSPR_TEXTURE == regtype) { /* vs: WINED3DSPR_ADDR */ - - if (pshader) - reg_maps->texcoord[reg] = 1; - else - reg_maps->address[reg] = 1; - } - - else if (WINED3DSPR_TEMP == regtype) - reg_maps->temporary[reg] = 1; - - else if (WINED3DSPR_INPUT == regtype) { - if( !pshader) - reg_maps->attributes[reg] = 1; - else { - if(param & WINED3DSHADER_ADDRMODE_RELATIVE) { - /* If relative addressing is used, we must assume that all registers - * are used. Even if it is a construct like v3[aL], we can't assume - * that v0, v1 and v2 aren't read because aL can be negative - */ - unsigned int i; - for(i = 0; i < MAX_REG_INPUT; i++) { - ((IWineD3DPixelShaderImpl *) This)->input_reg_used[i] = TRUE; - } - } else { - ((IWineD3DPixelShaderImpl *) This)->input_reg_used[reg] = TRUE; - } - } - } - - else if (WINED3DSPR_RASTOUT == regtype && reg == 1) - reg_maps->fog = 1; - - else if (WINED3DSPR_MISCTYPE == regtype && reg == 0 && pshader) - reg_maps->vpos = 1; - - else if(WINED3DSPR_CONST == regtype) { - if(param & WINED3DSHADER_ADDRMODE_RELATIVE) { - if(!pshader) { - if(reg <= ((IWineD3DVertexShaderImpl *) This)->min_rel_offset) { - ((IWineD3DVertexShaderImpl *) This)->min_rel_offset = reg; - } else if(reg >= ((IWineD3DVertexShaderImpl *) This)->max_rel_offset) { - ((IWineD3DVertexShaderImpl *) This)->max_rel_offset = reg; - } - } - reg_maps->usesrelconstF = TRUE; - } - } - else if(WINED3DSPR_CONSTINT == regtype) { - intconst |= (1 << reg); - } - else if(WINED3DSPR_CONSTBOOL == regtype) { - boolconst |= (1 << reg); - } - - /* WINED3DSPR_TEXCRDOUT is the same as WINED3DSPR_OUTPUT. _OUTPUT can be > MAX_REG_TEXCRD and is used - * in >= 3.0 shaders. Filter 3.0 shaders to prevent overflows, and also filter pixel shaders because TECRDOUT - * isn't used in them, but future register types might cause issues - */ - else if (WINED3DSPR_TEXCRDOUT == regtype && i == 0 /* Only look at writes */ + /* WINED3DSPR_TEXCRDOUT is the same as WINED3DSPR_OUTPUT. _OUTPUT can be > MAX_REG_TEXCRD and + * is used in >= 3.0 shaders. Filter 3.0 shaders to prevent overflows, and also filter pixel + * shaders because TECRDOUT isn't used in them, but future register types might cause issues */ + if (regtype == WINED3DSPR_TEXCRDOUT && i == 0 /* Only look at writes */ && !pshader && WINED3DSHADER_VERSION_MAJOR(shader_version) < 3) { reg_maps->texcoord_mask[reg] |= shader_get_writemask(param); } + else + { + shader_record_register_usage(This, reg_maps, regtype, reg, + param & WINED3DSHADER_ADDRMODE_RELATIVE, pshader); + } } } } @@ -779,8 +799,6 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m reg_maps->loop_depth = max_loop_depth; This->baseShader.functionLength = ((char *)pToken - (char *)byte_code); - This->baseShader.num_bool_consts = count_bits(boolconst); - This->baseShader.num_int_consts = count_bits(intconst); return WINED3D_OK; } diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 6aca4da0ff1..329ae9412b8 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -558,13 +558,15 @@ static void shader_glsl_load_constants( prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version); /* Load DirectX 9 integer constants/uniforms for vertex shader */ - if(vshader->baseShader.num_int_consts) { + if (vshader->baseShader.reg_maps.integer_constants) + { shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, stateBlock->vertexShaderConstantI, stateBlock->changed.vertexShaderConstantsI); } /* Load DirectX 9 boolean constants/uniforms for vertex shader */ - if(vshader->baseShader.num_bool_consts) { + if (vshader->baseShader.reg_maps.boolean_constants) + { shader_glsl_load_constantsB(vshader, gl_info, programId, stateBlock->vertexShaderConstantB, stateBlock->changed.vertexShaderConstantsB); } @@ -583,13 +585,15 @@ static void shader_glsl_load_constants( prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version); /* Load DirectX 9 integer constants/uniforms for pixel shader */ - if(pshader->baseShader.num_int_consts) { + if (pshader->baseShader.reg_maps.integer_constants) + { shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, stateBlock->pixelShaderConstantI, stateBlock->changed.pixelShaderConstantsI); } /* Load DirectX 9 boolean constants/uniforms for pixel shader */ - if(pshader->baseShader.num_bool_consts) { + if (pshader->baseShader.reg_maps.boolean_constants) + { shader_glsl_load_constantsB(pshader, gl_info, programId, stateBlock->pixelShaderConstantB, stateBlock->changed.pixelShaderConstantsB); } @@ -748,12 +752,12 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s * (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float */ max_constantsF = GL_LIMITS(vshader_constantsF) - 3; - max_constantsF -= This->baseShader.num_int_consts; + max_constantsF -= count_bits(This->baseShader.reg_maps.integer_constants); /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly, * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but * for now take this into account when calculating the number of available constants */ - max_constantsF -= This->baseShader.num_bool_consts; + max_constantsF -= count_bits(This->baseShader.reg_maps.boolean_constants); /* Set by driver quirks in directx.c */ max_constantsF -= GLINFO_LOCATION.reserved_glsl_constants; } else { @@ -767,10 +771,10 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s /* Always declare the full set of constants, the compiler can remove the unused ones because d3d doesn't(yet) * support indirect int and bool constant addressing. This avoids problems if the app uses e.g. i0 and i9. */ - if (This->baseShader.limits.constant_int > 0 && This->baseShader.num_int_consts) + if (This->baseShader.limits.constant_int > 0 && This->baseShader.reg_maps.integer_constants) shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int); - if (This->baseShader.limits.constant_bool > 0 && This->baseShader.num_bool_consts) + if (This->baseShader.limits.constant_bool > 0 && This->baseShader.reg_maps.boolean_constants) shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool); if(!pshader) { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 59b3b601d24..6a22c60aa6d 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -416,6 +416,8 @@ typedef struct shader_reg_maps char attributes[MAX_ATTRIBS]; /* vertex */ char labels[MAX_LABELS]; /* pixel, vertex */ DWORD texcoord_mask[MAX_REG_TEXCRD]; /* vertex < 3.0 */ + WORD integer_constants; /* MAX_CONST_I, 16 */ + WORD boolean_constants; /* MAX_CONST_B, 16 */ /* Sampler usage tokens * Use 0 as default (bit 31 is always 1 on a valid token) */ @@ -2318,7 +2320,6 @@ typedef struct IWineD3DBaseShaderClass UINT functionLength; UINT cur_loop_depth, cur_loop_regno; BOOL load_local_constsF; - BOOL num_bool_consts, num_int_consts; /* Type of shader backend */ int shader_mode;