wined3d: sincos for vertex shaders.
SCS is unfortunately a fragment program only instruction. If we have the NV extensions we can use SIN and COS. Otherwise we have to approximate sine and cosine with a taylor series. Luckily we're provided with the necessary constants by the application.
This commit is contained in:
parent
2f3faf4526
commit
6492622350
|
@ -10111,6 +10111,82 @@ static void alphatest_test(IDirect3DDevice9 *device) {
|
|||
ok(hr == D3D_OK, "IDirect3DDevice9_SetPixelShader failed with 0x%08x\n", hr);
|
||||
}
|
||||
|
||||
static void sincos_test(IDirect3DDevice9 *device) {
|
||||
const DWORD sin_shader_code[] = {
|
||||
0xfffe0200, /* vs_2_0 */
|
||||
0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */
|
||||
0x05000051, 0xa00f0002, 0x40490fdb, 0x3f800000, 0x00000000, 0x3f59999a, /* def c2, 3.14159, 1, 0, 0.85 */
|
||||
0x03000005, 0x80010001, 0x90000000, 0xa0000002, /* mul r1.x, v0.x, c2.x */
|
||||
0x04000025, 0x80020000, 0x80000001, 0xa0e40000, 0xa0e40001, /* sincos r0.y, r1.x, c0, c1 */
|
||||
0x02000001, 0xc00d0000, 0x90e40000, /* mov oPos.xzw, v0 */
|
||||
0x03000005, 0xc0020000, 0x80550000, 0xa0ff0002, /* mul oPos.y, r0.y, c2.w */
|
||||
0x02000001, 0xd00f0000, 0xa0a60002, /* mov oD0, c2.zyzz */
|
||||
0x0000ffff /* end */
|
||||
};
|
||||
const DWORD cos_shader_code[] = {
|
||||
0xfffe0200, /* vs_2_0 */
|
||||
0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */
|
||||
0x05000051, 0xa00f0002, 0x40490fdb, 0x3f800000, 0x00000000, 0x3f59999a, /* def c2, 3.14159, 1, 0, 0.85 */
|
||||
0x03000005, 0x80010001, 0x90000000, 0xa0000002, /* mul r1.x, v0.x, c2.x */
|
||||
0x04000025, 0x80010000, 0x80000001, 0xa0e40000, 0xa0e40001, /* sincos r0.x, r1.x, c0, c1 */
|
||||
0x02000001, 0xc00d0000, 0x90e40000, /* mov oPos.xzw, v0 */
|
||||
0x03000005, 0xc0020000, 0x80000000, 0xa0ff0002, /* mul oPos.y, r0.x, c2.w */
|
||||
0x02000001, 0xd00f0000, 0xa0a90002, /* mov oD0, c2.yzzz */
|
||||
0x0000ffff /* end */
|
||||
};
|
||||
IDirect3DVertexShader9 *sin_shader, *cos_shader;
|
||||
HRESULT hr;
|
||||
struct {
|
||||
float x, y, z;
|
||||
} data[1280];
|
||||
unsigned int i;
|
||||
float sincosc1[4] = {D3DSINCOSCONST1};
|
||||
float sincosc2[4] = {D3DSINCOSCONST2};
|
||||
|
||||
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0x00000000, 1.0f, 0);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_Clear failed with 0x%08x\n", hr);
|
||||
|
||||
hr = IDirect3DDevice9_CreateVertexShader(device, sin_shader_code, &sin_shader);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_Clear failed with 0x%08x\n", hr);
|
||||
hr = IDirect3DDevice9_CreateVertexShader(device, cos_shader_code, &cos_shader);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_Clear failed with 0x%08x\n", hr);
|
||||
hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_SetFVF failed with 0x%08x\n", hr);
|
||||
hr = IDirect3DDevice9_SetVertexShaderConstantF(device, 0, sincosc1, 1);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShaderConstantF failed with 0x%08x\n", hr);
|
||||
hr = IDirect3DDevice9_SetVertexShaderConstantF(device, 1, sincosc2, 1);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShaderConstantF failed with 0x%08x\n", hr);
|
||||
|
||||
/* Generate a point from -1 to 1 every 0.5 pixels */
|
||||
for(i = 0; i < 1280; i++) {
|
||||
data[i].x = (-640.0 + i) / 640.0;
|
||||
data[i].y = 0.0;
|
||||
data[i].z = 0.1;
|
||||
}
|
||||
|
||||
hr = IDirect3DDevice9_BeginScene(device);
|
||||
if(SUCCEEDED(hr)) {
|
||||
hr = IDirect3DDevice9_SetVertexShader(device, sin_shader);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShader failed with 0x%08x\n", hr);
|
||||
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_POINTLIST, 1280, data, sizeof(*data));
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_DrawPrimitiveUP failed with 0x%08x\n", hr);
|
||||
|
||||
hr = IDirect3DDevice9_SetVertexShader(device, cos_shader);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShader failed with 0x%08x\n", hr);
|
||||
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_POINTLIST, 1280, data, sizeof(*data));
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_DrawPrimitiveUP failed with 0x%08x\n", hr);
|
||||
|
||||
hr = IDirect3DDevice9_EndScene(device);
|
||||
ok(hr == D3D_OK, "IDirect3DDevice9_EndScene failed with 0x%08x\n", hr);
|
||||
}
|
||||
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
|
||||
/* TODO: Find a way to properly validate the lines. Precicion issues make this a kinda nasty task */
|
||||
|
||||
IDirect3DDevice9_SetVertexShader(device, NULL);
|
||||
IDirect3DVertexShader9_Release(sin_shader);
|
||||
IDirect3DVertexShader9_Release(cos_shader);
|
||||
}
|
||||
|
||||
START_TEST(visual)
|
||||
{
|
||||
IDirect3DDevice9 *device_ptr;
|
||||
|
@ -10220,6 +10296,7 @@ START_TEST(visual)
|
|||
if (caps.VertexShaderVersion >= D3DVS_VERSION(2, 0))
|
||||
{
|
||||
test_mova(device_ptr);
|
||||
sincos_test(device_ptr);
|
||||
if (caps.VertexShaderVersion >= D3DVS_VERSION(3, 0)) {
|
||||
test_vshader_input(device_ptr);
|
||||
test_vshader_float16(device_ptr);
|
||||
|
|
|
@ -1725,13 +1725,98 @@ static void shader_hw_sincos(const struct wined3d_shader_instruction *ins)
|
|||
* can't use map2gl
|
||||
*/
|
||||
SHADER_BUFFER *buffer = ins->ctx->buffer;
|
||||
struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
|
||||
const struct wined3d_shader_dst_param *dst = &ins->dst[0];
|
||||
char dst_name[50];
|
||||
char src_name[50];
|
||||
char src_name0[50], src_name1[50], src_name2[50];
|
||||
BOOL is_color;
|
||||
|
||||
shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
|
||||
shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
|
||||
shader_addline(buffer, "SCS%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name,
|
||||
src_name);
|
||||
shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
|
||||
if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
|
||||
shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
|
||||
shader_addline(buffer, "SCS%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name,
|
||||
src_name0);
|
||||
} else if(priv->target_version >= NV2) {
|
||||
shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
|
||||
|
||||
/* Sincos writemask must be .x, .y or .xy */
|
||||
if(dst->write_mask & WINED3DSP_WRITEMASK_0)
|
||||
shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
|
||||
if(dst->write_mask & WINED3DSP_WRITEMASK_1)
|
||||
shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
|
||||
} else {
|
||||
/* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
|
||||
* helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
|
||||
*
|
||||
* sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
|
||||
* cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
|
||||
*
|
||||
* The constants we get are:
|
||||
*
|
||||
* +1 +1, -1 -1 +1 +1 -1 -1
|
||||
* ---- , ---- , ---- , ----- , ----- , ----- , ------
|
||||
* 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128
|
||||
*
|
||||
* If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
|
||||
*
|
||||
* (x/2)^2 = x^2 / 4
|
||||
* (x/2)^3 = x^3 / 8
|
||||
* (x/2)^4 = x^4 / 16
|
||||
* (x/2)^5 = x^5 / 32
|
||||
* etc
|
||||
*
|
||||
* To get the final result:
|
||||
* sin(x) = 2 * sin(x/2) * cos(x/2)
|
||||
* cos(x) = cos(x/2)^2 - sin(x/2)^2
|
||||
* (from sin(x+y) and cos(x+y) rules)
|
||||
*
|
||||
* As per MSDN, dst.z is undefined after the operation, and so is
|
||||
* dst.x and dst.y if they're masked out by the writemask. Ie
|
||||
* sincos dst.y, src1, c0, c1
|
||||
* returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
|
||||
* vsa.exe also stops with an error if the dest register is the same register as the source
|
||||
* register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
|
||||
* indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
|
||||
*/
|
||||
shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
|
||||
shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2);
|
||||
shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
|
||||
|
||||
shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */
|
||||
shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */
|
||||
shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */
|
||||
shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */
|
||||
shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */
|
||||
shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */
|
||||
|
||||
/* sin(x/2)
|
||||
*
|
||||
* Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
|
||||
* properly merge that with MULs in the code above?
|
||||
* The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
|
||||
* we can merge the sine and cosine MAD rows to calculate them together.
|
||||
*/
|
||||
shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */
|
||||
shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */
|
||||
shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */
|
||||
shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */
|
||||
|
||||
/* cos(x/2) */
|
||||
shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
|
||||
shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */
|
||||
shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */
|
||||
|
||||
if(dst->write_mask & WINED3DSP_WRITEMASK_0) {
|
||||
/* cos x */
|
||||
shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n");
|
||||
shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name);
|
||||
}
|
||||
if(dst->write_mask & WINED3DSP_WRITEMASK_1) {
|
||||
/* sin x */
|
||||
shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name);
|
||||
shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* GL locking is done by the caller */
|
||||
|
|
Loading…
Reference in New Issue