d3dx9_36: Implement D3DXSHMultiply4.

This commit is contained in:
Nozomi Kodama 2012-10-02 14:10:17 +08:00 committed by Alexandre Julliard
parent 6643f07d2b
commit b8cfe42326
4 changed files with 340 additions and 1 deletions

View File

@ -281,7 +281,7 @@
@ stub D3DXSHEvalSphericalLight(long ptr long long long long ptr ptr ptr)
@ stdcall D3DXSHMultiply2(ptr ptr ptr)
@ stdcall D3DXSHMultiply3(ptr ptr ptr)
@ stub D3DXSHMultiply4(ptr ptr ptr)
@ stdcall D3DXSHMultiply4(ptr ptr ptr)
@ stub D3DXSHMultiply5(ptr ptr ptr)
@ stub D3DXSHMultiply6(ptr ptr ptr)
@ stub D3DXSHProjectCubeMap(long ptr ptr ptr ptr)

View File

@ -2429,6 +2429,294 @@ FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
return out;
}
FLOAT * WINAPI D3DXSHMultiply4(FLOAT *out, CONST FLOAT *a, CONST FLOAT *b)
{
FLOAT ta, tb, t;
TRACE("out %p, a %p, b %p\n", out, a, b);
out[0] = 0.28209479f * a[0] * b[0];
ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
out[1] = ta * b[1] + tb * a[1];
t = a[1] * b[1];
out[0] += 0.28209479f * t;
out[6] = -0.12615663f * t;
out[8] = -0.21850969f * t;
ta = 0.21850969f * a[3] - 0.05839917f * a[13] - 0.22617901f * a[15];
tb = 0.21850969f * b[3] - 0.05839917f * b[13] - 0.22617901f * b[15];
out[1] += ta * b[4] + tb * a[4];
out[4] = ta * b[1] + tb * a[1];
t = a[1] * b[4] + a[4] * b[1];
out[3] = 0.21850969f * t;
out[13] = -0.05839917f * t;
out[15] = -0.22617901f * t;
ta = 0.21850969f * a[2] - 0.14304817f * a[12] - 0.18467439f * a[14];
tb = 0.21850969f * b[2] - 0.14304817f * b[12] - 0.18467439f * b[14];
out[1] += ta * b[5] + tb * a[5];
out[5] = ta * b[1] + tb * a[1];
t = a[1] * b[5] + a[5] * b[1];
out[2] = 0.21850969f * t;
out[12] = -0.14304817f * t;
out[14] = -0.18467439f * t;
ta = 0.20230066f * a[11];
tb = 0.20230066f * b[11];
out[1] += ta * b[6] + tb * a[6];
out[6] += ta * b[1] + tb * a[1];
t = a[1] * b[6] + a[6] * b[1];
out[11] = 0.20230066f * t;
ta = 0.22617901f * a[9] + 0.05839917f * a[11];
tb = 0.22617901f * b[9] + 0.05839917f * b[11];
out[1] += ta * b[8] + tb * a[8];
out[8] += ta * b[1] + tb * a[1];
t = a[1] * b[8] + a[8] * b[1];
out[9] = 0.22617901f * t;
out[11] += 0.05839917f * t;
ta = 0.28209480f * a[0] + 0.25231326f * a[6];
tb = 0.28209480f * b[0] + 0.25231326f * b[6];
out[2] += ta * b[2] + tb * a[2];
t = a[2] * b[2];
out[0] += 0.28209480f * t;
out[6] += 0.25231326f * t;
ta = 0.24776671f * a[12];
tb = 0.24776671f * b[12];
out[2] += ta * b[6] + tb * a[6];
out[6] += ta * b[2] + tb * a[2];
t = a[2] * b[6] + a[6] * b[2];
out[12] += 0.24776671f * t;
ta = 0.28209480f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
tb = 0.28209480f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
out[3] += ta * b[3] + tb * a[3];
t = a[3] * b[3];
out[0] += 0.28209480f * t;
out[6] -= 0.12615663f * t;
out[8] += 0.21850969f * t;
ta = 0.20230066f * a[13];
tb = 0.20230066f * b[13];
out[3] += ta * b[6] + tb * a[6];
out[6] += ta * b[3] + tb * a[3];
t = a[3] * b[6] + a[6] * b[3];
out[13] += 0.20230066f * t;
ta = 0.21850969f * a[2] - 0.14304817f * a[12] + 0.18467439f * a[14];
tb = 0.21850969f * b[2] - 0.14304817f * b[12] + 0.18467439f * b[14];
out[3] += ta * b[7] + tb * a[7];
out[7] = ta * b[3] + tb * a[3];
t = a[3] * b[7] + a[7] * b[3];
out[2] += 0.21850969f * t;
out[12] -= 0.14304817f * t;
out[14] += 0.18467439f * t;
ta = -0.05839917f * a[13] + 0.22617901f * a[15];
tb = -0.05839917f * b[13] + 0.22617901f * b[15];
out[3] += ta * b[8] + tb * a[8];
out[8] += ta * b[3] + tb * a[3];
t = a[3] * b[8] + a[8] * b[3];
out[13] -= 0.05839917f * t;
out[15] += 0.22617901f * t;
ta = 0.28209479f * a[0] - 0.18022375f * a[6];
tb = 0.28209479f * b[0] - 0.18022375f * b[6];
out[4] += ta * b[4] + tb * a[4];
t = a[4] * b[4];
out[0] += 0.28209479f * t;
out[6] -= 0.18022375f * t;
ta = 0.15607835f * a[7];
tb = 0.15607835f * b[7];
out[4] += ta * b[5] + tb * a[5];
out[5] += ta * b[4] + tb * a[4];
t = a[4] * b[5] + a[5] * b[4];
out[7] += 0.15607835f * t;
ta = 0.22617901f * a[3] - 0.09403160f * a[13];
tb = 0.22617901f * b[3] - 0.09403160f * b[13];
out[4] += ta * b[9] + tb * a[9];
out[9] += ta * b[4] + tb * a[4];
t = a[4] * b[9] + a[9] * b[4];
out[3] += 0.22617901f * t;
out[13] -= 0.09403160f * t;
ta = 0.18467439f * a[2] - 0.18806319f * a[12];
tb = 0.18467439f * b[2] - 0.18806319f * b[12];
out[4] += ta * b[10] + tb * a [10];
out[10] = ta * b[4] + tb * a[4];
t = a[4] * b[10] + a[10] * b[4];
out[2] += 0.18467439f * t;
out[12] -= 0.18806319f * t;
ta = -0.05839917f * a[3] + 0.14567312f * a[13] + 0.09403160f * a[15];
tb = -0.05839917f * b[3] + 0.14567312f * b[13] + 0.09403160f * b[15];
out[4] += ta * b[11] + tb * a[11];
out[11] += ta * b[4] + tb * a[4];
t = a[4] * b[11] + a[11] * b[4];
out[3] -= 0.05839917f * t;
out[13] += 0.14567312f * t;
out[15] += 0.09403160f * t;
ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
out[5] += ta * b[5] + tb * a[5];
t = a[5] * b[5];
out[0] += 0.28209479f * t;
out[6] += 0.09011186f * t;
out[8] -= 0.15607835f * t;
ta = 0.14867701f * a[14];
tb = 0.14867701f * b[14];
out[5] += ta * b[9] + tb * a[9];
out[9] += ta * b[5] + tb * a[5];
t = a[5] * b[9] + a[9] * b[5];
out[14] += 0.14867701f * t;
ta = 0.18467439f * a[3] + 0.11516472f * a[13] - 0.14867701f * a[15];
tb = 0.18467439f * b[3] + 0.11516472f * b[13] - 0.14867701f * b[15];
out[5] += ta * b[10] + tb * a[10];
out[10] += ta * b[5] + tb * a[5];
t = a[5] * b[10] + a[10] * b[5];
out[3] += 0.18467439f * t;
out[13] += 0.11516472f * t;
out[15] -= 0.14867701f * t;
ta = 0.23359668f * a[2] + 0.05947080f * a[12] - 0.11516472f * a[14];
tb = 0.23359668f * b[2] + 0.05947080f * b[12] - 0.11516472f * b[14];
out[5] += ta * b[11] + tb * a[11];
out[11] += ta * b[5] + tb * a[5];
t = a[5] * b[11] + a[11] * b[5];
out[2] += 0.23359668f * t;
out[12] += 0.05947080f * t;
out[14] -= 0.11516472f * t;
ta = 0.28209479f * a[0];
tb = 0.28209479f * b[0];
out[6] += ta * b[6] + tb * a[6];
t = a[6] * b[6];
out[0] += 0.28209479f * t;
out[6] += 0.18022376f * t;
ta = 0.09011186f * a[6] + 0.28209479f * a[0] + 0.15607835f * a[8];
tb = 0.09011186f * b[6] + 0.28209479f * b[0] + 0.15607835f * b[8];
out[7] += ta * b[7] + tb * a[7];
t = a[7] * b[7];
out[6] += 0.09011186f * t;
out[0] += 0.28209479f * t;
out[8] += 0.15607835f * t;
ta = 0.14867701f * a[9] + 0.18467439f * a[1] + 0.11516472f * a[11];
tb = 0.14867701f * b[9] + 0.18467439f * b[1] + 0.11516472f * b[11];
out[7] += ta * b[10] + tb * a[10];
out[10] += ta * b[7] + tb * a[7];
t = a[7] * b[10] + a[10] * b[7];
out[9] += 0.14867701f * t;
out[1] += 0.18467439f * t;
out[11] += 0.11516472f * t;
ta = 0.05947080f * a[12] + 0.23359668f * a[2] + 0.11516472f * a[14];
tb = 0.05947080f * b[12] + 0.23359668f * b[2] + 0.11516472f * b[14];
out[7] += ta * b[13] + tb * a[13];
out[13] += ta * b[7]+ tb * a[7];
t = a[7] * b[13] + a[13] * b[7];
out[12] += 0.05947080f * t;
out[2] += 0.23359668f * t;
out[14] += 0.11516472f * t;
ta = 0.14867701f * a[15];
tb = 0.14867701f * b[15];
out[7] += ta * b[14] + tb * a[14];
out[14] += ta * b[7] + tb * a[7];
t = a[7] * b[14] + a[14] * b[7];
out[15] += 0.14867701f * t;
ta = 0.28209479f * a[0] - 0.18022375f * a[6];
tb = 0.28209479f * b[0] - 0.18022375f * b[6];
out[8] += ta * b[8] + tb * a[8];
t = a[8] * b[8];
out[0] += 0.28209479f * t;
out[6] -= 0.18022375f * t;
ta = -0.09403160f * a[11];
tb = -0.09403160f * b[11];
out[8] += ta * b[9] + tb * a[9];
out[9] += ta * b[8] + tb * a[8];
t = a[8] * b[9] + a[9] * b[8];
out[11] -= 0.09403160f * t;
ta = -0.09403160f * a[15];
tb = -0.09403160f * b[15];
out[8] += ta * b[13] + tb * a[13];
out[13] += ta * b[8] + tb * a[8];
t = a[8] * b[13] + a[13] * b[8];
out[15] -= 0.09403160f * t;
ta = 0.18467439f * a[2] - 0.18806319f * a[12];
tb = 0.18467439f * b[2] - 0.18806319f * b[12];
out[8] += ta * b[14] + tb * a[14];
out[14] += ta * b[8] + tb * a[8];
t = a[8] * b[14] + a[14] * b[8];
out[2] += 0.18467439f * t;
out[12] -= 0.18806319f * t;
ta = -0.21026104f * a[6] + 0.28209479f * a[0];
tb = -0.21026104f * b[6] + 0.28209479f * b[0];
out[9] += ta * b[9] + tb * a[9];
t = a[9] * b[9];
out[6] -= 0.21026104f * t;
out[0] += 0.28209479f * t;
ta = 0.28209479f * a[0];
tb = 0.28209479f * b[0];
out[10] += ta * b[10] + tb * a[10];
t = a[10] * b[10];
out[0] += 0.28209479f * t;
ta = 0.28209479f * a[0] + 0.12615663f * a[6] - 0.14567312f * a[8];
tb = 0.28209479f * b[0] + 0.12615663f * b[6] - 0.14567312f * b[8];
out[11] += ta * b[11] + tb * a[11];
t = a[11] * b[11];
out[0] += 0.28209479f * t;
out[6] += 0.12615663f * t;
out[8] -= 0.14567312f * t;
ta = 0.28209479f * a[0] + 0.16820885f * a[6];
tb = 0.28209479f * b[0] + 0.16820885f * b[6];
out[12] += ta * b[12] + tb * a[12];
t = a[12] * b[12];
out[0] += 0.28209479f * t;
out[6] += 0.16820885f * t;
ta =0.28209479f * a[0] + 0.14567312f * a[8] + 0.12615663f * a[6];
tb =0.28209479f * b[0] + 0.14567312f * b[8] + 0.12615663f * b[6];
out[13] += ta * b[13] + tb * a[13];
t = a[13] * b[13];
out[0] += 0.28209479f * t;
out[8] += 0.14567312f * t;
out[6] += 0.12615663f * t;
ta = 0.28209479f * a[0];
tb = 0.28209479f * b[0];
out[14] += ta * b[14] + tb * a[14];
t = a[14] * b[14];
out[0] += 0.28209479f * t;
ta = 0.28209479f * a[0] - 0.21026104f * a[6];
tb = 0.28209479f * b[0] - 0.21026104f * b[6];
out[15] += ta * b[15] + tb * a[15];
t = a[15] * b[15];
out[0] += 0.28209479f * t;
out[6] -= 0.21026104f * t;
return out;
}
static void rotate_X(FLOAT *out, UINT order, FLOAT a, FLOAT *in)
{
out[0] = in[0];

View File

@ -2663,6 +2663,55 @@ static void test_D3DXSHMultiply3(void)
ok(relative_error(c[i], expected[i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[i], c[i]);
}
static void test_D3DXSHMultiply4(void)
{
unsigned int i;
FLOAT a[20], b[20], c[20];
/* D3DXSHMultiply4 only modifies the first 16 elements of the array */
const FLOAT expected[] =
{ /* c, a, b */
14.182599f, 2.615703f, 12.828601f, 9.820596f, 3.039696f, 4.530442f,
5.820584f, 12.249846f, 2.194346f, 3.900152f, 5.416609f, 5.601813f,
0.959982f, 7.037550f, 3.625230f, 0.463601f, 16.0f, 17.0f, 18.0f, 19.0f,
/* c, c, b */
-211441.265625f, -2529.157715f, -10023.393555f, -441.277191f, -163.994385f,
-526.305115f, 29636.187500f, -3931.830811f, -13577.111328f, -3978.973877f,
-10330.341797f, -13779.787109f, -16685.109375f, -44981.375000f, -73269.742188f,
-95237.335938f, 16.0f, 17.0f, 18.0f, 19.0f,
/* c, c, c */
0.236682f, -0.717649f, -0.180500f, -0.077124f, 0.144831f, 0.573286f,
-0.337959f, 0.055694f, -0.442100f, 0.147702f, -0.055157f, 0.084337f,
0.179877f, 0.009099f, 0.232200f, 0.074142f, 1.6f, 1.7f, 1.8f, 1.9f, };
for (i = 0; i < 20; i++)
{
a[i] = 1.0f + i / 100.0f;
b[i] = 3.0f - i / 100.0f;
c[i] = i;
}
D3DXSHMultiply4(c, a, b);
for (i = 0; i < 20; i++)
ok(relative_error(c[i], expected[i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[i], c[i]);
for (i = 0; i < 20; i++)
{
b[i] = 3.0f - i / 100.0f;
c[i] = i;
}
D3DXSHMultiply4(c, c, b);
for (i = 0; i < 20; i++)
ok(relative_error(c[i], expected[20 + i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[20 + i], c[i]);
for (i = 0; i < 20; i++)
c[i] = 0.1f * i;
D3DXSHMultiply4(c, c, c);
for (i = 0; i < 20; i++)
ok(relative_error(c[i], expected[40 + i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[40 + i], c[i]);
}
static void test_D3DXSHRotate(void)
{
D3DXMATRIX m[4];
@ -2829,6 +2878,7 @@ START_TEST(math)
test_D3DXSHEvalDirectionalLight();
test_D3DXSHMultiply2();
test_D3DXSHMultiply3();
test_D3DXSHMultiply4();
test_D3DXSHRotate();
test_D3DXSHRotateZ();
test_D3DXSHScale();

View File

@ -383,6 +383,7 @@ FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, CONST D3DXVECTOR3 *dir
HRESULT WINAPI D3DXSHEvalDirectionalLight(UINT order, CONST D3DXVECTOR3 *dir, FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout);
FLOAT* WINAPI D3DXSHMultiply2(FLOAT *out, CONST FLOAT *a, CONST FLOAT *b);
FLOAT* WINAPI D3DXSHMultiply3(FLOAT *out, CONST FLOAT *a, CONST FLOAT *b);
FLOAT* WINAPI D3DXSHMultiply4(FLOAT *out, CONST FLOAT *a, CONST FLOAT *b);
FLOAT* WINAPI D3DXSHRotate(FLOAT *out, UINT order, CONST D3DXMATRIX *matrix, CONST FLOAT *in);
FLOAT* WINAPI D3DXSHRotateZ(FLOAT *out, UINT order, FLOAT angle, CONST FLOAT *in);
FLOAT* WINAPI D3DXSHScale(FLOAT *out, UINT order, CONST FLOAT *a, CONST FLOAT scale);