wined3d: Get rid of TMP accesses in texm3x3* instructions.
This commit is contained in:
parent
ced325f816
commit
ad217029b0
|
@ -1294,9 +1294,16 @@ static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins)
|
||||||
SHADER_BUFFER *buffer = ins->ctx->buffer;
|
SHADER_BUFFER *buffer = ins->ctx->buffer;
|
||||||
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
|
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
|
||||||
char src0_name[50];
|
char src0_name[50];
|
||||||
|
unsigned int dst;
|
||||||
|
|
||||||
|
/* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
|
||||||
|
* incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
|
||||||
|
* register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
|
||||||
|
*/
|
||||||
|
dst = reg + 2 - current_state->current_row;
|
||||||
|
|
||||||
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
||||||
shader_addline(buffer, "DP3 TMP.%c, fragment.texcoord[%u], %s;\n", 'x' + current_state->current_row, reg, src0_name);
|
shader_addline(buffer, "DP3 T%u.%c, fragment.texcoord[%u], %s;\n", dst, 'x' + current_state->current_row, reg, src0_name);
|
||||||
current_state->texcoord_w[current_state->current_row++] = reg;
|
current_state->texcoord_w[current_state->current_row++] = reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1309,15 +1316,17 @@ static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins)
|
||||||
SHADER_BUFFER *buffer = ins->ctx->buffer;
|
SHADER_BUFFER *buffer = ins->ctx->buffer;
|
||||||
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
|
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
|
||||||
char dst_str[50];
|
char dst_str[50];
|
||||||
|
char dst_reg[8];
|
||||||
char src0_name[50];
|
char src0_name[50];
|
||||||
|
|
||||||
|
sprintf(dst_reg, "T%u", reg);
|
||||||
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
||||||
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
|
shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
|
||||||
|
|
||||||
/* Sample the texture using the calculated coordinates */
|
/* Sample the texture using the calculated coordinates */
|
||||||
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
|
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
|
||||||
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
|
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
|
||||||
shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
|
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
|
||||||
current_state->current_row = 0;
|
current_state->current_row = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1331,10 +1340,14 @@ static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins
|
||||||
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
|
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
|
||||||
char dst_str[50];
|
char dst_str[50];
|
||||||
char src0_name[50];
|
char src0_name[50];
|
||||||
|
char dst_reg[8];
|
||||||
|
|
||||||
|
/* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
|
||||||
|
* components for temporary data storage
|
||||||
|
*/
|
||||||
|
sprintf(dst_reg, "T%u", reg);
|
||||||
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
||||||
/* Note: TMP.xy is input here, generated in earlier texm3x3pad instructions */
|
shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
|
||||||
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
|
|
||||||
|
|
||||||
/* Construct the eye-ray vector from w coordinates */
|
/* Construct the eye-ray vector from w coordinates */
|
||||||
shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", current_state->texcoord_w[0]);
|
shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", current_state->texcoord_w[0]);
|
||||||
|
@ -1343,18 +1356,18 @@ static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins
|
||||||
|
|
||||||
/* Calculate reflection vector
|
/* Calculate reflection vector
|
||||||
*/
|
*/
|
||||||
shader_addline(buffer, "DP3 TMP.w, TMP, TB;\n");
|
shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
|
||||||
/* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
|
/* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
|
||||||
shader_addline(buffer, "DP3 TB.w, TMP, TMP;\n");
|
shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
|
||||||
shader_addline(buffer, "RCP TB.w, TB.w;\n");
|
shader_addline(buffer, "RCP TB.w, TB.w;\n");
|
||||||
shader_addline(buffer, "MUL TMP.w, TMP.w, TB.w;\n");
|
shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
|
||||||
shader_addline(buffer, "MUL TMP, TMP.w, TMP;\n");
|
shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
|
||||||
shader_addline(buffer, "MAD TMP, coefmul.x, TMP, -TB;\n");
|
shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
|
||||||
|
|
||||||
/* Sample the texture using the calculated coordinates */
|
/* Sample the texture using the calculated coordinates */
|
||||||
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
|
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
|
||||||
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
|
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
|
||||||
shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
|
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
|
||||||
current_state->current_row = 0;
|
current_state->current_row = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1369,11 +1382,13 @@ static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
|
||||||
char dst_str[50];
|
char dst_str[50];
|
||||||
char src0_name[50];
|
char src0_name[50];
|
||||||
char src1_name[50];
|
char src1_name[50];
|
||||||
|
char dst_reg[8];
|
||||||
|
|
||||||
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
|
||||||
shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
|
shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
|
||||||
/* Note: TMP.xy is input here, generated by two texm3x3pad instructions */
|
/* Note: TMP.xy is input here, generated by two texm3x3pad instructions */
|
||||||
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
|
sprintf(dst_reg, "T%u", reg);
|
||||||
|
shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
|
||||||
|
|
||||||
/* Calculate reflection vector.
|
/* Calculate reflection vector.
|
||||||
*
|
*
|
||||||
|
@ -1383,17 +1398,17 @@ static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
|
||||||
*
|
*
|
||||||
* Which normalizes the normal vector
|
* Which normalizes the normal vector
|
||||||
*/
|
*/
|
||||||
shader_addline(buffer, "DP3 TMP.w, TMP, %s;\n", src1_name);
|
shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
|
||||||
shader_addline(buffer, "DP3 TC.w, TMP, TMP;\n");
|
shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
|
||||||
shader_addline(buffer, "RCP TC.w, TC.w;\n");
|
shader_addline(buffer, "RCP TC.w, TC.w;\n");
|
||||||
shader_addline(buffer, "MUL TMP.w, TMP.w, TC.w;\n");
|
shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
|
||||||
shader_addline(buffer, "MUL TMP, TMP.w, TMP;\n");
|
shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
|
||||||
shader_addline(buffer, "MAD TMP, coefmul.x, TMP, -%s;\n", src1_name);
|
shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
|
||||||
|
|
||||||
/* Sample the texture using the calculated coordinates */
|
/* Sample the texture using the calculated coordinates */
|
||||||
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
|
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
|
||||||
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
|
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
|
||||||
shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
|
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
|
||||||
current_state->current_row = 0;
|
current_state->current_row = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1469,8 +1484,8 @@ static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
|
||||||
|
|
||||||
shader_arb_get_dst_param(ins, dst, dst_str);
|
shader_arb_get_dst_param(ins, dst, dst_str);
|
||||||
shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
|
shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
|
||||||
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", dst->reg.idx, src0);
|
shader_addline(buffer, "DP3 T%u.z, fragment.texcoord[%u], %s;\n", dst->reg.idx, dst->reg.idx, src0);
|
||||||
shader_addline(buffer, "MOV %s, TMP;\n", dst_str);
|
shader_addline(buffer, "MOV %s, T%u;\n", dst_str, dst->reg.idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:
|
/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:
|
||||||
|
|
Loading…
Reference in New Issue