From 96e0c994b232dcd1def407d006d488483252c152 Mon Sep 17 00:00:00 2001
From: Sebastian Lackner <sebastian@fds-team.de>
Date: Wed, 7 Sep 2016 10:56:01 +0200
Subject: [PATCH] vcomp: Implement _vcomp_reduction_{u,i}4 and add tests.

Signed-off-by: Sebastian Lackner <sebastian@fds-team.de>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
---
 dlls/vcomp/main.c           | 30 ++++++++++++
 dlls/vcomp/tests/vcomp.c    | 95 +++++++++++++++++++++++++++++++++++++
 dlls/vcomp/vcomp.spec       |  4 +-
 dlls/vcomp100/vcomp100.spec |  4 +-
 dlls/vcomp110/vcomp110.spec |  4 +-
 dlls/vcomp120/vcomp120.spec |  4 +-
 dlls/vcomp140/vcomp140.spec |  4 +-
 dlls/vcomp90/vcomp90.spec   |  4 +-
 8 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/dlls/vcomp/main.c b/dlls/vcomp/main.c
index 61f84be5a0e..53ad28a66a3 100644
--- a/dlls/vcomp/main.c
+++ b/dlls/vcomp/main.c
@@ -580,6 +580,36 @@ void CDECL _vcomp_atomic_xor_i4(int *dest, int val)
     do old = *dest; while (interlocked_cmpxchg(dest, old ^ val, old) != old);
 }
 
+static void CDECL _vcomp_atomic_bool_and_i4(int *dest, int val)
+{
+    int old;
+    do old = *dest; while (interlocked_cmpxchg(dest, old && val, old) != old);
+}
+
+static void CDECL _vcomp_atomic_bool_or_i4(int *dest, int val)
+{
+    int old;
+    do old = *dest; while (interlocked_cmpxchg(dest, old ? old : (val != 0), old) != old);
+}
+
+void CDECL _vcomp_reduction_i4(unsigned int flags, int *dest, int val)
+{
+    static void (CDECL * const funcs[])(int *, int) =
+    {
+        _vcomp_atomic_add_i4,
+        _vcomp_atomic_add_i4,
+        _vcomp_atomic_mul_i4,
+        _vcomp_atomic_and_i4,
+        _vcomp_atomic_or_i4,
+        _vcomp_atomic_xor_i4,
+        _vcomp_atomic_bool_and_i4,
+        _vcomp_atomic_bool_or_i4,
+    };
+    unsigned int op = (flags >> 8) & 0xf;
+    op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
+    funcs[op](dest, val);
+}
+
 void CDECL _vcomp_atomic_add_i8(LONG64 *dest, LONG64 val)
 {
     LONG64 old;
diff --git a/dlls/vcomp/tests/vcomp.c b/dlls/vcomp/tests/vcomp.c
index 5b4c10e46b6..dc84f988fdf 100644
--- a/dlls/vcomp/tests/vcomp.c
+++ b/dlls/vcomp/tests/vcomp.c
@@ -103,6 +103,8 @@ static int   (CDECL   *p_vcomp_get_thread_num)(void);
 static void  (CDECL   *p_vcomp_leave_critsect)(CRITICAL_SECTION *critsect);
 static int   (CDECL   *p_vcomp_master_begin)(void);
 static void  (CDECL   *p_vcomp_master_end)(void);
+static void  (CDECL   *p_vcomp_reduction_i4)(unsigned int flags, int *dest, int val);
+static void  (CDECL   *p_vcomp_reduction_u4)(unsigned int flags, unsigned int *dest, unsigned int val);
 static void  (CDECL   *p_vcomp_sections_init)(int n);
 static int   (CDECL   *p_vcomp_sections_next)(void);
 static void  (CDECL   *p_vcomp_set_num_threads)(int num_threads);
@@ -131,6 +133,14 @@ static void  (CDECL   *pomp_unset_nest_lock)(omp_nest_lock_t *lock);
 #define VCOMP_DYNAMIC_FLAGS_GUIDED      0x03
 #define VCOMP_DYNAMIC_FLAGS_INCREMENT   0x40
 
+#define VCOMP_REDUCTION_FLAGS_ADD       0x100
+#define VCOMP_REDUCTION_FLAGS_MUL       0x200
+#define VCOMP_REDUCTION_FLAGS_AND       0x300
+#define VCOMP_REDUCTION_FLAGS_OR        0x400
+#define VCOMP_REDUCTION_FLAGS_XOR       0x500
+#define VCOMP_REDUCTION_FLAGS_BOOL_AND  0x600
+#define VCOMP_REDUCTION_FLAGS_BOOL_OR   0x700
+
 #ifdef __i386__
 #define ARCH "x86"
 #elif defined(__x86_64__)
@@ -336,6 +346,8 @@ static BOOL init_vcomp(void)
     VCOMP_GET_PROC(_vcomp_leave_critsect);
     VCOMP_GET_PROC(_vcomp_master_begin);
     VCOMP_GET_PROC(_vcomp_master_end);
+    VCOMP_GET_PROC(_vcomp_reduction_i4);
+    VCOMP_GET_PROC(_vcomp_reduction_u4);
     VCOMP_GET_PROC(_vcomp_sections_init);
     VCOMP_GET_PROC(_vcomp_sections_next);
     VCOMP_GET_PROC(_vcomp_set_num_threads);
@@ -1871,6 +1883,88 @@ static void test_atomic_double(void)
     }
 }
 
+static void CDECL reduction_cb(int *a, int *b)
+{
+    p_vcomp_reduction_i4(VCOMP_REDUCTION_FLAGS_ADD, a, 1);
+    p_vcomp_reduction_i4(VCOMP_REDUCTION_FLAGS_ADD | 0xfffff0ff, b, 1);
+}
+
+static void test_reduction_integer32(void)
+{
+    struct
+    {
+        unsigned int flags;
+        int v1, v2, expected;
+    }
+    tests[] =
+    {
+        { 0x000,                            0x11223344,  0x77665544, -0x77777778 },
+        { VCOMP_REDUCTION_FLAGS_ADD,        0x11223344,  0x77665544, -0x77777778 },
+        { VCOMP_REDUCTION_FLAGS_MUL,        0x11223344,  0x77665544,  -0xecccdf0 },
+        { VCOMP_REDUCTION_FLAGS_MUL,        0x11223344, -0x77665544,   0xecccdf0 },
+        { VCOMP_REDUCTION_FLAGS_AND,        0x11223344,  0x77665544,  0x11221144 },
+        { VCOMP_REDUCTION_FLAGS_OR,         0x11223344,  0x77665544,  0x77667744 },
+        { VCOMP_REDUCTION_FLAGS_XOR,        0x11223344,  0x77665544,  0x66446600 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_AND,            0,           0,           0 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_AND,            0,           2,           0 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_AND,            1,           0,           0 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_AND,            1,           2,           1 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_AND,            2,           0,           0 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_AND,            2,           2,           1 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_OR,             0,           0,           0 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_OR,             0,           2,           1 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_OR,             1,           0,           1 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_OR,             1,           2,           1 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_OR,             2,           0,           2 },
+        { VCOMP_REDUCTION_FLAGS_BOOL_OR,             2,           2,           2 },
+        { 0x800,                                     0,           2,           1 },
+        { 0x900,                                     0,           2,           1 },
+        { 0xa00,                                     0,           2,           1 },
+        { 0xb00,                                     0,           2,           1 },
+        { 0xc00,                                     0,           2,           1 },
+        { 0xd00,                                     0,           2,           1 },
+        { 0xe00,                                     0,           2,           1 },
+        { 0xf00,                                     0,           2,           1 },
+    };
+    int max_threads = pomp_get_max_threads();
+    int a, b, i;
+
+    a = b = 42;
+    reduction_cb(&a, &b);
+    ok(a == 43, "expected a == 43, got %d\n", a);
+    ok(b == 43, "expected b == 43, got %d\n", b);
+
+    for (i = 1; i <= 4; i++)
+    {
+        pomp_set_num_threads(i);
+
+        a = b = 42;
+        p_vcomp_fork(TRUE, 2, reduction_cb, &a, &b);
+        ok(a == 42 + i, "expected a == %d, got %d\n", 42 + i, a);
+        ok(b == 42 + i, "expected b == %d, got %d\n", 42 + i, b);
+
+        a = b = 42;
+        p_vcomp_fork(FALSE, 2, reduction_cb, &a, &b);
+        ok(a == 43, "expected a == 43, got %d\n", a);
+        ok(b == 43, "expected b == 43, got %d\n", b);
+    }
+
+    pomp_set_num_threads(max_threads);
+
+    for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
+    {
+        int val = tests[i].v1;
+        p_vcomp_reduction_i4(tests[i].flags, &val, tests[i].v2);
+        ok(val == tests[i].expected, "test %d: expected val == %d, got %d\n", i, tests[i].expected, val);
+    }
+    for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
+    {
+        unsigned int val = tests[i].v1;
+        p_vcomp_reduction_u4(tests[i].flags, &val, tests[i].v2);
+        ok(val == tests[i].expected, "test %d: expected val == %u, got %u\n", i, tests[i].expected, val);
+    }
+}
+
 START_TEST(vcomp)
 {
     if (!init_vcomp())
@@ -1895,6 +1989,7 @@ START_TEST(vcomp)
     test_atomic_integer64();
     test_atomic_float();
     test_atomic_double();
+    test_reduction_integer32();
 
     release_vcomp();
 }
diff --git a/dlls/vcomp/vcomp.spec b/dlls/vcomp/vcomp.spec
index 91874fd9604..ddb35b89be9 100644
--- a/dlls/vcomp/vcomp.spec
+++ b/dlls/vcomp/vcomp.spec
@@ -75,13 +75,13 @@
 @ stub _vcomp_ordered_loop_end
 @ stub _vcomp_reduction_i1
 @ stub _vcomp_reduction_i2
-@ stub _vcomp_reduction_i4
+@ cdecl _vcomp_reduction_i4(long ptr long)
 @ stub _vcomp_reduction_i8
 @ stub _vcomp_reduction_r4
 @ stub _vcomp_reduction_r8
 @ stub _vcomp_reduction_u1
 @ stub _vcomp_reduction_u2
-@ stub _vcomp_reduction_u4
+@ cdecl _vcomp_reduction_u4(long ptr long) _vcomp_reduction_i4
 @ stub _vcomp_reduction_u8
 @ cdecl _vcomp_sections_init(long)
 @ cdecl _vcomp_sections_next()
diff --git a/dlls/vcomp100/vcomp100.spec b/dlls/vcomp100/vcomp100.spec
index b139a54d57b..92f6a9dbd1f 100644
--- a/dlls/vcomp100/vcomp100.spec
+++ b/dlls/vcomp100/vcomp100.spec
@@ -75,13 +75,13 @@
 @ stub _vcomp_ordered_loop_end
 @ stub _vcomp_reduction_i1
 @ stub _vcomp_reduction_i2
-@ stub _vcomp_reduction_i4
+@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4
 @ stub _vcomp_reduction_i8
 @ stub _vcomp_reduction_r4
 @ stub _vcomp_reduction_r8
 @ stub _vcomp_reduction_u1
 @ stub _vcomp_reduction_u2
-@ stub _vcomp_reduction_u4
+@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4
 @ stub _vcomp_reduction_u8
 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init
 @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next
diff --git a/dlls/vcomp110/vcomp110.spec b/dlls/vcomp110/vcomp110.spec
index bb8c0072aaf..cb2a21d2b2b 100644
--- a/dlls/vcomp110/vcomp110.spec
+++ b/dlls/vcomp110/vcomp110.spec
@@ -76,13 +76,13 @@
 @ stub _vcomp_ordered_loop_end
 @ stub _vcomp_reduction_i1
 @ stub _vcomp_reduction_i2
-@ stub _vcomp_reduction_i4
+@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4
 @ stub _vcomp_reduction_i8
 @ stub _vcomp_reduction_r4
 @ stub _vcomp_reduction_r8
 @ stub _vcomp_reduction_u1
 @ stub _vcomp_reduction_u2
-@ stub _vcomp_reduction_u4
+@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4
 @ stub _vcomp_reduction_u8
 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init
 @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next
diff --git a/dlls/vcomp120/vcomp120.spec b/dlls/vcomp120/vcomp120.spec
index bb8c0072aaf..cb2a21d2b2b 100644
--- a/dlls/vcomp120/vcomp120.spec
+++ b/dlls/vcomp120/vcomp120.spec
@@ -76,13 +76,13 @@
 @ stub _vcomp_ordered_loop_end
 @ stub _vcomp_reduction_i1
 @ stub _vcomp_reduction_i2
-@ stub _vcomp_reduction_i4
+@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4
 @ stub _vcomp_reduction_i8
 @ stub _vcomp_reduction_r4
 @ stub _vcomp_reduction_r8
 @ stub _vcomp_reduction_u1
 @ stub _vcomp_reduction_u2
-@ stub _vcomp_reduction_u4
+@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4
 @ stub _vcomp_reduction_u8
 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init
 @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next
diff --git a/dlls/vcomp140/vcomp140.spec b/dlls/vcomp140/vcomp140.spec
index bb8c0072aaf..cb2a21d2b2b 100644
--- a/dlls/vcomp140/vcomp140.spec
+++ b/dlls/vcomp140/vcomp140.spec
@@ -76,13 +76,13 @@
 @ stub _vcomp_ordered_loop_end
 @ stub _vcomp_reduction_i1
 @ stub _vcomp_reduction_i2
-@ stub _vcomp_reduction_i4
+@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4
 @ stub _vcomp_reduction_i8
 @ stub _vcomp_reduction_r4
 @ stub _vcomp_reduction_r8
 @ stub _vcomp_reduction_u1
 @ stub _vcomp_reduction_u2
-@ stub _vcomp_reduction_u4
+@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4
 @ stub _vcomp_reduction_u8
 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init
 @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next
diff --git a/dlls/vcomp90/vcomp90.spec b/dlls/vcomp90/vcomp90.spec
index b139a54d57b..92f6a9dbd1f 100644
--- a/dlls/vcomp90/vcomp90.spec
+++ b/dlls/vcomp90/vcomp90.spec
@@ -75,13 +75,13 @@
 @ stub _vcomp_ordered_loop_end
 @ stub _vcomp_reduction_i1
 @ stub _vcomp_reduction_i2
-@ stub _vcomp_reduction_i4
+@ cdecl _vcomp_reduction_i4(long ptr long) vcomp._vcomp_reduction_i4
 @ stub _vcomp_reduction_i8
 @ stub _vcomp_reduction_r4
 @ stub _vcomp_reduction_r8
 @ stub _vcomp_reduction_u1
 @ stub _vcomp_reduction_u2
-@ stub _vcomp_reduction_u4
+@ cdecl _vcomp_reduction_u4(long ptr long) vcomp._vcomp_reduction_u4
 @ stub _vcomp_reduction_u8
 @ cdecl _vcomp_sections_init(long) vcomp._vcomp_sections_init
 @ cdecl _vcomp_sections_next() vcomp._vcomp_sections_next