diff --git a/dlls/vcomp/main.c b/dlls/vcomp/main.c index ba7ead7829c..8523155c886 100644 --- a/dlls/vcomp/main.c +++ b/dlls/vcomp/main.c @@ -736,6 +736,36 @@ void CDECL _vcomp_atomic_xor_i8(LONG64 *dest, LONG64 val) do old = *dest; while (interlocked_cmpxchg64(dest, old ^ val, old) != old); } +static void CDECL _vcomp_atomic_bool_and_i8(LONG64 *dest, LONG64 val) +{ + LONG64 old; + do old = *dest; while (interlocked_cmpxchg64(dest, old && val, old) != old); +} + +static void CDECL _vcomp_atomic_bool_or_i8(LONG64 *dest, LONG64 val) +{ + LONG64 old; + do old = *dest; while (interlocked_cmpxchg64(dest, old ? old : (val != 0), old) != old); +} + +void CDECL _vcomp_reduction_i8(unsigned int flags, LONG64 *dest, LONG64 val) +{ + static void (CDECL * const funcs[])(LONG64 *, LONG64) = + { + _vcomp_atomic_add_i8, + _vcomp_atomic_add_i8, + _vcomp_atomic_mul_i8, + _vcomp_atomic_and_i8, + _vcomp_atomic_or_i8, + _vcomp_atomic_xor_i8, + _vcomp_atomic_bool_and_i8, + _vcomp_atomic_bool_or_i8, + }; + unsigned int op = (flags >> 8) & 0xf; + op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1); + funcs[op](dest, val); +} + void CDECL _vcomp_atomic_add_r4(float *dest, float val) { int old, new; diff --git a/dlls/vcomp/tests/vcomp.c b/dlls/vcomp/tests/vcomp.c index 3ecbde656c9..088f25680fd 100644 --- a/dlls/vcomp/tests/vcomp.c +++ b/dlls/vcomp/tests/vcomp.c @@ -106,9 +106,11 @@ static void (CDECL *p_vcomp_master_end)(void); static void (CDECL *p_vcomp_reduction_i1)(unsigned int flags, char *dest, char val); static void (CDECL *p_vcomp_reduction_i2)(unsigned int flags, short *dest, short val); static void (CDECL *p_vcomp_reduction_i4)(unsigned int flags, int *dest, int val); +static void (CDECL *p_vcomp_reduction_i8)(unsigned int flags, LONG64 *dest, LONG64 val); static void (CDECL *p_vcomp_reduction_u1)(unsigned int flags, unsigned char *dest, unsigned char val); static void (CDECL *p_vcomp_reduction_u2)(unsigned int flags, unsigned short *dest, unsigned short val); static void (CDECL *p_vcomp_reduction_u4)(unsigned int flags, unsigned int *dest, unsigned int val); +static void (CDECL *p_vcomp_reduction_u8)(unsigned int flags, ULONG64 *dest, ULONG64 val); static void (CDECL *p_vcomp_sections_init)(int n); static int (CDECL *p_vcomp_sections_next)(void); static void (CDECL *p_vcomp_set_num_threads)(int num_threads); @@ -353,9 +355,11 @@ static BOOL init_vcomp(void) VCOMP_GET_PROC(_vcomp_reduction_i1); VCOMP_GET_PROC(_vcomp_reduction_i2); VCOMP_GET_PROC(_vcomp_reduction_i4); + VCOMP_GET_PROC(_vcomp_reduction_i8); VCOMP_GET_PROC(_vcomp_reduction_u1); VCOMP_GET_PROC(_vcomp_reduction_u2); VCOMP_GET_PROC(_vcomp_reduction_u4); + VCOMP_GET_PROC(_vcomp_reduction_u8); VCOMP_GET_PROC(_vcomp_sections_init); VCOMP_GET_PROC(_vcomp_sections_next); VCOMP_GET_PROC(_vcomp_set_num_threads); @@ -1893,7 +1897,7 @@ static void test_atomic_double(void) static void test_reduction_integer8(void) { - struct + static const struct { unsigned int flags; char v1, v2, expected; @@ -1937,7 +1941,7 @@ static void test_reduction_integer8(void) static void test_reduction_integer16(void) { - struct + static const struct { unsigned int flags; short v1, v2, expected; @@ -1987,7 +1991,7 @@ static void CDECL reduction_cb(int *a, int *b) static void test_reduction_integer32(void) { - struct + static const struct { unsigned int flags; int v1, v2, expected; @@ -2061,6 +2065,49 @@ static void test_reduction_integer32(void) } } +static void test_reduction_integer64(void) +{ + static const struct + { + unsigned int flags; + LONG64 v1, v2, expected; + } + tests[] = + { + { 0x000, 0x1122334455667788, 0x7766554433221100, -0x7777777777777778 }, + { VCOMP_REDUCTION_FLAGS_ADD, 0x1122334455667788, 0x7766554433221100, -0x7777777777777778 }, + { VCOMP_REDUCTION_FLAGS_MUL, 0x1122334455667788, 0x7766554433221100, 0x3e963337c6000800 }, + { VCOMP_REDUCTION_FLAGS_MUL, 0x1122334455667788, -0x7766554433221100, 0xc169ccc839fff800 }, + { VCOMP_REDUCTION_FLAGS_AND, 0x1122334455667788, 0x7766554433221100, 0x1122114411221100 }, + { VCOMP_REDUCTION_FLAGS_OR, 0x1122334455667788, 0x7766554433221100, 0x7766774477667788 }, + { VCOMP_REDUCTION_FLAGS_XOR, 0x1122334455667788, 0x7766554433221100, 0x6644660066446688 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 2, 1 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 2, 1 }, + { 0x800, 0, 2, 1 }, + { 0x900, 0, 2, 1 }, + { 0xa00, 0, 2, 1 }, + { 0xb00, 0, 2, 1 }, + { 0xc00, 0, 2, 1 }, + { 0xd00, 0, 2, 1 }, + { 0xe00, 0, 2, 1 }, + { 0xf00, 0, 2, 1 }, + }; + int i; + + for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) + { + LONG64 val = tests[i].v1; + p_vcomp_reduction_i8(tests[i].flags, &val, tests[i].v2); + ok(val == tests[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val)); + } + for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) + { + ULONG64 val = tests[i].v1; + p_vcomp_reduction_u8(tests[i].flags, &val, tests[i].v2); + ok(val == tests[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val)); + } +} + START_TEST(vcomp) { if (!init_vcomp()) @@ -2088,6 +2135,7 @@ START_TEST(vcomp) test_reduction_integer8(); test_reduction_integer16(); test_reduction_integer32(); + test_reduction_integer64(); release_vcomp(); } diff --git a/dlls/vcomp/vcomp.spec b/dlls/vcomp/vcomp.spec index eb5f39c0ecd..6f57746ac36 100644 --- a/dlls/vcomp/vcomp.spec +++ b/dlls/vcomp/vcomp.spec @@ -76,13 +76,13 @@ @ cdecl _vcomp_reduction_i1(long ptr long) @ cdecl _vcomp_reduction_i2(long ptr long) @ cdecl _vcomp_reduction_i4(long ptr long) -@ stub _vcomp_reduction_i8 +@ cdecl _vcomp_reduction_i8(long ptr int64) @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) _vcomp_reduction_i1 @ cdecl _vcomp_reduction_u2(long ptr long) _vcomp_reduction_i2 @ cdecl _vcomp_reduction_u4(long ptr long) _vcomp_reduction_i4 -@ stub _vcomp_reduction_u8 +@ cdecl _vcomp_reduction_u8(long ptr int64) _vcomp_reduction_i8 @ cdecl _vcomp_sections_init(long) @ cdecl _vcomp_sections_next() @ cdecl _vcomp_set_num_threads(long) diff --git a/dlls/vcomp100/vcomp100.spec b/dlls/vcomp100/vcomp100.spec index 73543c153e6..564b3d72a64 100644 --- a/dlls/vcomp100/vcomp100.spec +++ b/dlls/vcomp100/vcomp100.spec @@ -76,13 +76,13 @@ @ cdecl _vcomp_reduction_i1(long ptr long) vcomp._vcomp_reduction_i1 @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 -@ stub _vcomp_reduction_i8 +@ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 @ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 -@ stub _vcomp_reduction_u8 +@ cdecl _vcomp_reduction_u8(long ptr int64) vcomp._vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next @ cdecl _vcomp_set_num_threads(long) vcomp._vcomp_set_num_threads diff --git a/dlls/vcomp110/vcomp110.spec b/dlls/vcomp110/vcomp110.spec index 6fb3811258c..fa6c0473eaa 100644 --- a/dlls/vcomp110/vcomp110.spec +++ b/dlls/vcomp110/vcomp110.spec @@ -77,13 +77,13 @@ @ cdecl _vcomp_reduction_i1(long ptr long) vcomp._vcomp_reduction_i1 @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 -@ stub _vcomp_reduction_i8 +@ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 @ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 -@ stub _vcomp_reduction_u8 +@ cdecl _vcomp_reduction_u8(long ptr int64) vcomp._vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next @ cdecl _vcomp_set_num_threads(long) vcomp._vcomp_set_num_threads diff --git a/dlls/vcomp120/vcomp120.spec b/dlls/vcomp120/vcomp120.spec index 6fb3811258c..fa6c0473eaa 100644 --- a/dlls/vcomp120/vcomp120.spec +++ b/dlls/vcomp120/vcomp120.spec @@ -77,13 +77,13 @@ @ cdecl _vcomp_reduction_i1(long ptr long) vcomp._vcomp_reduction_i1 @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 -@ stub _vcomp_reduction_i8 +@ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 @ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 -@ stub _vcomp_reduction_u8 +@ cdecl _vcomp_reduction_u8(long ptr int64) vcomp._vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next @ cdecl _vcomp_set_num_threads(long) vcomp._vcomp_set_num_threads diff --git a/dlls/vcomp140/vcomp140.spec b/dlls/vcomp140/vcomp140.spec index 6fb3811258c..fa6c0473eaa 100644 --- a/dlls/vcomp140/vcomp140.spec +++ b/dlls/vcomp140/vcomp140.spec @@ -77,13 +77,13 @@ @ cdecl _vcomp_reduction_i1(long ptr long) vcomp._vcomp_reduction_i1 @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 -@ stub _vcomp_reduction_i8 +@ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 @ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 -@ stub _vcomp_reduction_u8 +@ cdecl _vcomp_reduction_u8(long ptr int64) vcomp._vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next @ cdecl _vcomp_set_num_threads(long) vcomp._vcomp_set_num_threads diff --git a/dlls/vcomp90/vcomp90.spec b/dlls/vcomp90/vcomp90.spec index 73543c153e6..564b3d72a64 100644 --- a/dlls/vcomp90/vcomp90.spec +++ b/dlls/vcomp90/vcomp90.spec @@ -76,13 +76,13 @@ @ cdecl _vcomp_reduction_i1(long ptr long) vcomp._vcomp_reduction_i1 @ cdecl _vcomp_reduction_i2(long ptr long) vcomp._vcomp_reduction_i2 @ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 -@ stub _vcomp_reduction_i8 +@ cdecl _vcomp_reduction_i8(long ptr int64) vcomp._vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ cdecl _vcomp_reduction_u1(long ptr long) vcomp._vcomp_reduction_u1 @ cdecl _vcomp_reduction_u2(long ptr long) vcomp._vcomp_reduction_u2 @ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 -@ stub _vcomp_reduction_u8 +@ cdecl _vcomp_reduction_u8(long ptr int64) vcomp._vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next @ cdecl _vcomp_set_num_threads(long) vcomp._vcomp_set_num_threads