From 96e0c994b232dcd1def407d006d488483252c152 Mon Sep 17 00:00:00 2001 From: Sebastian Lackner Date: Wed, 7 Sep 2016 10:56:01 +0200 Subject: [PATCH] vcomp: Implement _vcomp_reduction_{u,i}4 and add tests. Signed-off-by: Sebastian Lackner Signed-off-by: Alexandre Julliard --- dlls/vcomp/main.c | 30 ++++++++++++ dlls/vcomp/tests/vcomp.c | 95 +++++++++++++++++++++++++++++++++++++ dlls/vcomp/vcomp.spec | 4 +- dlls/vcomp100/vcomp100.spec | 4 +- dlls/vcomp110/vcomp110.spec | 4 +- dlls/vcomp120/vcomp120.spec | 4 +- dlls/vcomp140/vcomp140.spec | 4 +- dlls/vcomp90/vcomp90.spec | 4 +- 8 files changed, 137 insertions(+), 12 deletions(-) diff --git a/dlls/vcomp/main.c b/dlls/vcomp/main.c index 61f84be5a0e..53ad28a66a3 100644 --- a/dlls/vcomp/main.c +++ b/dlls/vcomp/main.c @@ -580,6 +580,36 @@ void CDECL _vcomp_atomic_xor_i4(int *dest, int val) do old = *dest; while (interlocked_cmpxchg(dest, old ^ val, old) != old); } +static void CDECL _vcomp_atomic_bool_and_i4(int *dest, int val) +{ + int old; + do old = *dest; while (interlocked_cmpxchg(dest, old && val, old) != old); +} + +static void CDECL _vcomp_atomic_bool_or_i4(int *dest, int val) +{ + int old; + do old = *dest; while (interlocked_cmpxchg(dest, old ? old : (val != 0), old) != old); +} + +void CDECL _vcomp_reduction_i4(unsigned int flags, int *dest, int val) +{ + static void (CDECL * const funcs[])(int *, int) = + { + _vcomp_atomic_add_i4, + _vcomp_atomic_add_i4, + _vcomp_atomic_mul_i4, + _vcomp_atomic_and_i4, + _vcomp_atomic_or_i4, + _vcomp_atomic_xor_i4, + _vcomp_atomic_bool_and_i4, + _vcomp_atomic_bool_or_i4, + }; + unsigned int op = (flags >> 8) & 0xf; + op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1); + funcs[op](dest, val); +} + void CDECL _vcomp_atomic_add_i8(LONG64 *dest, LONG64 val) { LONG64 old; diff --git a/dlls/vcomp/tests/vcomp.c b/dlls/vcomp/tests/vcomp.c index 5b4c10e46b6..dc84f988fdf 100644 --- a/dlls/vcomp/tests/vcomp.c +++ b/dlls/vcomp/tests/vcomp.c @@ -103,6 +103,8 @@ static int (CDECL *p_vcomp_get_thread_num)(void); static void (CDECL *p_vcomp_leave_critsect)(CRITICAL_SECTION *critsect); static int (CDECL *p_vcomp_master_begin)(void); static void (CDECL *p_vcomp_master_end)(void); +static void (CDECL *p_vcomp_reduction_i4)(unsigned int flags, int *dest, int val); +static void (CDECL *p_vcomp_reduction_u4)(unsigned int flags, unsigned int *dest, unsigned int val); static void (CDECL *p_vcomp_sections_init)(int n); static int (CDECL *p_vcomp_sections_next)(void); static void (CDECL *p_vcomp_set_num_threads)(int num_threads); @@ -131,6 +133,14 @@ static void (CDECL *pomp_unset_nest_lock)(omp_nest_lock_t *lock); #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40 +#define VCOMP_REDUCTION_FLAGS_ADD 0x100 +#define VCOMP_REDUCTION_FLAGS_MUL 0x200 +#define VCOMP_REDUCTION_FLAGS_AND 0x300 +#define VCOMP_REDUCTION_FLAGS_OR 0x400 +#define VCOMP_REDUCTION_FLAGS_XOR 0x500 +#define VCOMP_REDUCTION_FLAGS_BOOL_AND 0x600 +#define VCOMP_REDUCTION_FLAGS_BOOL_OR 0x700 + #ifdef __i386__ #define ARCH "x86" #elif defined(__x86_64__) @@ -336,6 +346,8 @@ static BOOL init_vcomp(void) VCOMP_GET_PROC(_vcomp_leave_critsect); VCOMP_GET_PROC(_vcomp_master_begin); VCOMP_GET_PROC(_vcomp_master_end); + VCOMP_GET_PROC(_vcomp_reduction_i4); + VCOMP_GET_PROC(_vcomp_reduction_u4); VCOMP_GET_PROC(_vcomp_sections_init); VCOMP_GET_PROC(_vcomp_sections_next); VCOMP_GET_PROC(_vcomp_set_num_threads); @@ -1871,6 +1883,88 @@ static void test_atomic_double(void) } } +static void CDECL reduction_cb(int *a, int *b) +{ + p_vcomp_reduction_i4(VCOMP_REDUCTION_FLAGS_ADD, a, 1); + p_vcomp_reduction_i4(VCOMP_REDUCTION_FLAGS_ADD | 0xfffff0ff, b, 1); +} + +static void test_reduction_integer32(void) +{ + struct + { + unsigned int flags; + int v1, v2, expected; + } + tests[] = + { + { 0x000, 0x11223344, 0x77665544, -0x77777778 }, + { VCOMP_REDUCTION_FLAGS_ADD, 0x11223344, 0x77665544, -0x77777778 }, + { VCOMP_REDUCTION_FLAGS_MUL, 0x11223344, 0x77665544, -0xecccdf0 }, + { VCOMP_REDUCTION_FLAGS_MUL, 0x11223344, -0x77665544, 0xecccdf0 }, + { VCOMP_REDUCTION_FLAGS_AND, 0x11223344, 0x77665544, 0x11221144 }, + { VCOMP_REDUCTION_FLAGS_OR, 0x11223344, 0x77665544, 0x77667744 }, + { VCOMP_REDUCTION_FLAGS_XOR, 0x11223344, 0x77665544, 0x66446600 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0, 0, 0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0, 2, 0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 0, 0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 2, 1 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2, 0, 0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2, 2, 1 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 0, 0 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 2, 1 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1, 0, 1 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1, 2, 1 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2, 0, 2 }, + { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2, 2, 2 }, + { 0x800, 0, 2, 1 }, + { 0x900, 0, 2, 1 }, + { 0xa00, 0, 2, 1 }, + { 0xb00, 0, 2, 1 }, + { 0xc00, 0, 2, 1 }, + { 0xd00, 0, 2, 1 }, + { 0xe00, 0, 2, 1 }, + { 0xf00, 0, 2, 1 }, + }; + int max_threads = pomp_get_max_threads(); + int a, b, i; + + a = b = 42; + reduction_cb(&a, &b); + ok(a == 43, "expected a == 43, got %d\n", a); + ok(b == 43, "expected b == 43, got %d\n", b); + + for (i = 1; i <= 4; i++) + { + pomp_set_num_threads(i); + + a = b = 42; + p_vcomp_fork(TRUE, 2, reduction_cb, &a, &b); + ok(a == 42 + i, "expected a == %d, got %d\n", 42 + i, a); + ok(b == 42 + i, "expected b == %d, got %d\n", 42 + i, b); + + a = b = 42; + p_vcomp_fork(FALSE, 2, reduction_cb, &a, &b); + ok(a == 43, "expected a == 43, got %d\n", a); + ok(b == 43, "expected b == 43, got %d\n", b); + } + + pomp_set_num_threads(max_threads); + + for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) + { + int val = tests[i].v1; + p_vcomp_reduction_i4(tests[i].flags, &val, tests[i].v2); + ok(val == tests[i].expected, "test %d: expected val == %d, got %d\n", i, tests[i].expected, val); + } + for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) + { + unsigned int val = tests[i].v1; + p_vcomp_reduction_u4(tests[i].flags, &val, tests[i].v2); + ok(val == tests[i].expected, "test %d: expected val == %u, got %u\n", i, tests[i].expected, val); + } +} + START_TEST(vcomp) { if (!init_vcomp()) @@ -1895,6 +1989,7 @@ START_TEST(vcomp) test_atomic_integer64(); test_atomic_float(); test_atomic_double(); + test_reduction_integer32(); release_vcomp(); } diff --git a/dlls/vcomp/vcomp.spec b/dlls/vcomp/vcomp.spec index 91874fd9604..ddb35b89be9 100644 --- a/dlls/vcomp/vcomp.spec +++ b/dlls/vcomp/vcomp.spec @@ -75,13 +75,13 @@ @ stub _vcomp_ordered_loop_end @ stub _vcomp_reduction_i1 @ stub _vcomp_reduction_i2 -@ stub _vcomp_reduction_i4 +@ cdecl _vcomp_reduction_i4(long ptr long) @ stub _vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ stub _vcomp_reduction_u1 @ stub _vcomp_reduction_u2 -@ stub _vcomp_reduction_u4 +@ cdecl _vcomp_reduction_u4(long ptr long) _vcomp_reduction_i4 @ stub _vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) @ cdecl _vcomp_sections_next() diff --git a/dlls/vcomp100/vcomp100.spec b/dlls/vcomp100/vcomp100.spec index b139a54d57b..92f6a9dbd1f 100644 --- a/dlls/vcomp100/vcomp100.spec +++ b/dlls/vcomp100/vcomp100.spec @@ -75,13 +75,13 @@ @ stub _vcomp_ordered_loop_end @ stub _vcomp_reduction_i1 @ stub _vcomp_reduction_i2 -@ stub _vcomp_reduction_i4 +@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ stub _vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ stub _vcomp_reduction_u1 @ stub _vcomp_reduction_u2 -@ stub _vcomp_reduction_u4 +@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 @ stub _vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next diff --git a/dlls/vcomp110/vcomp110.spec b/dlls/vcomp110/vcomp110.spec index bb8c0072aaf..cb2a21d2b2b 100644 --- a/dlls/vcomp110/vcomp110.spec +++ b/dlls/vcomp110/vcomp110.spec @@ -76,13 +76,13 @@ @ stub _vcomp_ordered_loop_end @ stub _vcomp_reduction_i1 @ stub _vcomp_reduction_i2 -@ stub _vcomp_reduction_i4 +@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ stub _vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ stub _vcomp_reduction_u1 @ stub _vcomp_reduction_u2 -@ stub _vcomp_reduction_u4 +@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 @ stub _vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next diff --git a/dlls/vcomp120/vcomp120.spec b/dlls/vcomp120/vcomp120.spec index bb8c0072aaf..cb2a21d2b2b 100644 --- a/dlls/vcomp120/vcomp120.spec +++ b/dlls/vcomp120/vcomp120.spec @@ -76,13 +76,13 @@ @ stub _vcomp_ordered_loop_end @ stub _vcomp_reduction_i1 @ stub _vcomp_reduction_i2 -@ stub _vcomp_reduction_i4 +@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ stub _vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ stub _vcomp_reduction_u1 @ stub _vcomp_reduction_u2 -@ stub _vcomp_reduction_u4 +@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 @ stub _vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next diff --git a/dlls/vcomp140/vcomp140.spec b/dlls/vcomp140/vcomp140.spec index bb8c0072aaf..cb2a21d2b2b 100644 --- a/dlls/vcomp140/vcomp140.spec +++ b/dlls/vcomp140/vcomp140.spec @@ -76,13 +76,13 @@ @ stub _vcomp_ordered_loop_end @ stub _vcomp_reduction_i1 @ stub _vcomp_reduction_i2 -@ stub _vcomp_reduction_i4 +@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ stub _vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ stub _vcomp_reduction_u1 @ stub _vcomp_reduction_u2 -@ stub _vcomp_reduction_u4 +@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 @ stub _vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next diff --git a/dlls/vcomp90/vcomp90.spec b/dlls/vcomp90/vcomp90.spec index b139a54d57b..92f6a9dbd1f 100644 --- a/dlls/vcomp90/vcomp90.spec +++ b/dlls/vcomp90/vcomp90.spec @@ -75,13 +75,13 @@ @ stub _vcomp_ordered_loop_end @ stub _vcomp_reduction_i1 @ stub _vcomp_reduction_i2 -@ stub _vcomp_reduction_i4 +@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4 @ stub _vcomp_reduction_i8 @ stub _vcomp_reduction_r4 @ stub _vcomp_reduction_r8 @ stub _vcomp_reduction_u1 @ stub _vcomp_reduction_u2 -@ stub _vcomp_reduction_u4 +@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4 @ stub _vcomp_reduction_u8 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next