wined3d: Avoid the TMP_COLOR mov in some cases.

Many 2.0 and 3.0 shaders end with a "mov oC0, rx". If sRGB writing is enabled,
the ARB backend writes to a TMP_COLOR temporary, and at the end of the shader
writes the sRGB corrected color to result.color. If oC0 is not partially
rewritten after the mov, we can ignore the mov, not declare TMP_COLOR at all,
and just use the rx register as input for the sRGB correction code. This saves
a temporary and an instruction.
This commit is contained in:
Stefan Dösinger 2009-05-26 15:53:52 +02:00 committed by Alexandre Julliard
parent da7176be2a
commit d8e219be75
3 changed files with 58 additions and 6 deletions

View File

@ -1130,12 +1130,13 @@ static void shader_hw_nop(const struct wined3d_shader_instruction *ins)
static void shader_hw_mov(const struct wined3d_shader_instruction *ins) static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
{ {
IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader; IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
BOOL pshader = shader_is_pshader_version(shader->baseShader.reg_maps.shader_version.type);
struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
SHADER_BUFFER *buffer = ins->ctx->buffer; SHADER_BUFFER *buffer = ins->ctx->buffer;
char src0_param[256]; char src0_param[256];
if(ins->handler_idx == WINED3DSIH_MOVA) { if(ins->handler_idx == WINED3DSIH_MOVA) {
struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
struct wined3d_shader_src_param tmp_src = ins->src[0]; struct wined3d_shader_src_param tmp_src = ins->src[0];
char write_mask[6]; char write_mask[6];
@ -1188,6 +1189,16 @@ static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
shader_addline(buffer, "ARL A0.x, %s;\n", src0_param); shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
} }
} }
else if(ins->dst[0].reg.type == WINED3DSPR_COLOROUT && ins->dst[0].reg.idx == 0 && pshader)
{
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) shader;
if(ctx->cur_ps_args->super.srgb_correction && ps->color0_mov)
{
shader_addline(buffer, "#mov handled in srgb write code\n");
return;
}
shader_hw_map2gl(ins);
}
else else
{ {
shader_hw_map2gl(ins); shader_hw_map2gl(ins);
@ -2022,7 +2033,7 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info; const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
const local_constant *lconst; const local_constant *lconst;
GLuint retval; GLuint retval;
const char *fragcolor; char fragcolor[16];
DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This); DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This);
struct shader_arb_ctx_priv priv_ctx; struct shader_arb_ctx_priv priv_ctx;
@ -2065,13 +2076,17 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
if (reg_maps->shader_version.major < 2) if (reg_maps->shader_version.major < 2)
{ {
fragcolor = "R0"; strcpy(fragcolor, "R0");
} else { } else {
if(args->super.srgb_correction) { if(args->super.srgb_correction) {
shader_addline(buffer, "TEMP TMP_COLOR;\n"); if(This->color0_mov) {
fragcolor = "TMP_COLOR"; sprintf(fragcolor, "R%u", This->color0_reg);
} else { } else {
fragcolor = "result.color"; shader_addline(buffer, "TEMP TMP_COLOR;\n");
strcpy(fragcolor, "TMP_COLOR");
}
} else {
strcpy(fragcolor, "result.color");
} }
} }

View File

@ -588,6 +588,7 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
else else
{ {
int i, limit; int i, limit;
BOOL color0_mov = FALSE;
/* This will loop over all the registers and try to /* This will loop over all the registers and try to
* make a bitmask of the ones we're interested in. * make a bitmask of the ones we're interested in.
@ -612,6 +613,11 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
} }
else else
{ {
if(pshader && dst_param.reg.type == WINED3DSPR_COLOROUT && dst_param.reg.idx == 0)
{
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) This;
ps->color0_mov = FALSE;
}
shader_record_register_usage(This, reg_maps, &dst_param.reg, pshader); shader_record_register_usage(This, reg_maps, &dst_param.reg, pshader);
} }
@ -651,6 +657,22 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
{ {
reg_maps->bumpmat[dst_param.reg.idx] = TRUE; reg_maps->bumpmat[dst_param.reg.idx] = TRUE;
} }
else if(pshader && ins.handler_idx == WINED3DSIH_MOV)
{
/* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
* COLOROUT 0. If we know this in advance, the ARB shader backend can skip
* the mov and perform the sRGB write correction from the source register.
*
* However, if the mov is only partial, we can't do this, and if the write
* comes from an instruction other than MOV it is hard to do as well. If
* COLOROUT 0 is overwritten partially later, the marker is dropped again
*/
if(dst_param.reg.type == WINED3DSPR_COLOROUT && dst_param.reg.idx == 0)
{
/* Used later when the source register is read */
color0_mov = TRUE;
}
}
} }
if (ins.handler_idx == WINED3DSIH_NRM) if (ins.handler_idx == WINED3DSIH_NRM)
@ -686,6 +708,17 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
shader_record_register_usage(This, reg_maps, &src_param.reg, pshader); shader_record_register_usage(This, reg_maps, &src_param.reg, pshader);
--count; --count;
} }
if(color0_mov)
{
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) This;
if(src_param.reg.type == WINED3DSPR_TEMP &&
src_param.swizzle == WINED3DSP_NOSWIZZLE)
{
ps->color0_mov = TRUE;
ps->color0_reg = src_param.reg.idx;
}
}
} }
} }
} }

View File

@ -2711,6 +2711,10 @@ typedef struct IWineD3DPixelShaderImpl {
unsigned char numbumpenvmatconsts; unsigned char numbumpenvmatconsts;
struct stb_const_desc luminanceconst[MAX_TEXTURES]; struct stb_const_desc luminanceconst[MAX_TEXTURES];
char vpos_uniform; char vpos_uniform;
BOOL color0_mov;
DWORD color0_reg;
} IWineD3DPixelShaderImpl; } IWineD3DPixelShaderImpl;
extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl; extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl;