wined3d: Avoid the TMP_COLOR mov in some cases.
Many 2.0 and 3.0 shaders end with a "mov oC0, rx". If sRGB writing is enabled, the ARB backend writes to a TMP_COLOR temporary, and at the end of the shader writes the sRGB corrected color to result.color. If oC0 is not partially rewritten after the mov, we can ignore the mov, not declare TMP_COLOR at all, and just use the rx register as input for the sRGB correction code. This saves a temporary and an instruction.
This commit is contained in:
parent
da7176be2a
commit
d8e219be75
|
@ -1130,12 +1130,13 @@ static void shader_hw_nop(const struct wined3d_shader_instruction *ins)
|
|||
static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
|
||||
{
|
||||
IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
|
||||
BOOL pshader = shader_is_pshader_version(shader->baseShader.reg_maps.shader_version.type);
|
||||
struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
|
||||
|
||||
SHADER_BUFFER *buffer = ins->ctx->buffer;
|
||||
char src0_param[256];
|
||||
|
||||
if(ins->handler_idx == WINED3DSIH_MOVA) {
|
||||
struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
|
||||
struct wined3d_shader_src_param tmp_src = ins->src[0];
|
||||
char write_mask[6];
|
||||
|
||||
|
@ -1188,6 +1189,16 @@ static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
|
|||
shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
|
||||
}
|
||||
}
|
||||
else if(ins->dst[0].reg.type == WINED3DSPR_COLOROUT && ins->dst[0].reg.idx == 0 && pshader)
|
||||
{
|
||||
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) shader;
|
||||
if(ctx->cur_ps_args->super.srgb_correction && ps->color0_mov)
|
||||
{
|
||||
shader_addline(buffer, "#mov handled in srgb write code\n");
|
||||
return;
|
||||
}
|
||||
shader_hw_map2gl(ins);
|
||||
}
|
||||
else
|
||||
{
|
||||
shader_hw_map2gl(ins);
|
||||
|
@ -2022,7 +2033,7 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
|
|||
const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
|
||||
const local_constant *lconst;
|
||||
GLuint retval;
|
||||
const char *fragcolor;
|
||||
char fragcolor[16];
|
||||
DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This);
|
||||
struct shader_arb_ctx_priv priv_ctx;
|
||||
|
||||
|
@ -2065,13 +2076,17 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
|
|||
|
||||
if (reg_maps->shader_version.major < 2)
|
||||
{
|
||||
fragcolor = "R0";
|
||||
strcpy(fragcolor, "R0");
|
||||
} else {
|
||||
if(args->super.srgb_correction) {
|
||||
shader_addline(buffer, "TEMP TMP_COLOR;\n");
|
||||
fragcolor = "TMP_COLOR";
|
||||
if(This->color0_mov) {
|
||||
sprintf(fragcolor, "R%u", This->color0_reg);
|
||||
} else {
|
||||
fragcolor = "result.color";
|
||||
shader_addline(buffer, "TEMP TMP_COLOR;\n");
|
||||
strcpy(fragcolor, "TMP_COLOR");
|
||||
}
|
||||
} else {
|
||||
strcpy(fragcolor, "result.color");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -588,6 +588,7 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
|
|||
else
|
||||
{
|
||||
int i, limit;
|
||||
BOOL color0_mov = FALSE;
|
||||
|
||||
/* This will loop over all the registers and try to
|
||||
* make a bitmask of the ones we're interested in.
|
||||
|
@ -612,6 +613,11 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
|
|||
}
|
||||
else
|
||||
{
|
||||
if(pshader && dst_param.reg.type == WINED3DSPR_COLOROUT && dst_param.reg.idx == 0)
|
||||
{
|
||||
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) This;
|
||||
ps->color0_mov = FALSE;
|
||||
}
|
||||
shader_record_register_usage(This, reg_maps, &dst_param.reg, pshader);
|
||||
}
|
||||
|
||||
|
@ -651,6 +657,22 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
|
|||
{
|
||||
reg_maps->bumpmat[dst_param.reg.idx] = TRUE;
|
||||
}
|
||||
else if(pshader && ins.handler_idx == WINED3DSIH_MOV)
|
||||
{
|
||||
/* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
|
||||
* COLOROUT 0. If we know this in advance, the ARB shader backend can skip
|
||||
* the mov and perform the sRGB write correction from the source register.
|
||||
*
|
||||
* However, if the mov is only partial, we can't do this, and if the write
|
||||
* comes from an instruction other than MOV it is hard to do as well. If
|
||||
* COLOROUT 0 is overwritten partially later, the marker is dropped again
|
||||
*/
|
||||
if(dst_param.reg.type == WINED3DSPR_COLOROUT && dst_param.reg.idx == 0)
|
||||
{
|
||||
/* Used later when the source register is read */
|
||||
color0_mov = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ins.handler_idx == WINED3DSIH_NRM)
|
||||
|
@ -686,6 +708,17 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
|
|||
shader_record_register_usage(This, reg_maps, &src_param.reg, pshader);
|
||||
--count;
|
||||
}
|
||||
|
||||
if(color0_mov)
|
||||
{
|
||||
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) This;
|
||||
if(src_param.reg.type == WINED3DSPR_TEMP &&
|
||||
src_param.swizzle == WINED3DSP_NOSWIZZLE)
|
||||
{
|
||||
ps->color0_mov = TRUE;
|
||||
ps->color0_reg = src_param.reg.idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2711,6 +2711,10 @@ typedef struct IWineD3DPixelShaderImpl {
|
|||
unsigned char numbumpenvmatconsts;
|
||||
struct stb_const_desc luminanceconst[MAX_TEXTURES];
|
||||
char vpos_uniform;
|
||||
|
||||
BOOL color0_mov;
|
||||
DWORD color0_reg;
|
||||
|
||||
} IWineD3DPixelShaderImpl;
|
||||
|
||||
extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl;
|
||||
|
|
Loading…
Reference in New Issue