vcomp: Implement C2VectParallel().
Used by Ancient Cities. Signed-off-by: Paul Gofman <pgofman@codeweavers.com> Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
parent
5366f67ef0
commit
ab89486e96
|
@ -33,6 +33,8 @@
|
|||
|
||||
WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
|
||||
|
||||
#define MAX_VECT_PARALLEL_CALLBACK_ARGS 128
|
||||
|
||||
typedef CRITICAL_SECTION *omp_lock_t;
|
||||
typedef CRITICAL_SECTION *omp_nest_lock_t;
|
||||
|
||||
|
@ -122,6 +124,14 @@ static void **ptr_from_va_list(__ms_va_list valist)
|
|||
return *(void ***)&valist;
|
||||
}
|
||||
|
||||
static void copy_va_list_data(void **args, __ms_va_list valist, int args_count)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < args_count; ++i)
|
||||
args[i] = va_arg(valist, void *);
|
||||
}
|
||||
|
||||
#if defined(__i386__)
|
||||
|
||||
extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, void **args);
|
||||
|
@ -1665,6 +1675,154 @@ void CDECL _vcomp_leave_critsect(CRITICAL_SECTION *critsect)
|
|||
LeaveCriticalSection(critsect);
|
||||
}
|
||||
|
||||
static unsigned int get_step_count(int start, int end, int range_offset, int step)
|
||||
{
|
||||
int range = end - start + step - range_offset;
|
||||
|
||||
if (step < 0)
|
||||
return (unsigned)-range / -step;
|
||||
else
|
||||
return (unsigned)range / step;
|
||||
}
|
||||
|
||||
static void CDECL c2vectparallel_wrapper(int start, int end, int step, int end_included, BOOL dynamic_distribution,
|
||||
int volatile *dynamic_start, void *function, int nargs, __ms_va_list valist)
|
||||
{
|
||||
void *wrapper_args[MAX_VECT_PARALLEL_CALLBACK_ARGS];
|
||||
unsigned int step_count, steps_per_call, remainder;
|
||||
int thread_count = omp_get_num_threads();
|
||||
int curr_start, curr_end, range_offset;
|
||||
int thread = _vcomp_get_thread_num();
|
||||
int step_sign;
|
||||
|
||||
copy_va_list_data(&wrapper_args[2], valist, nargs - 2);
|
||||
|
||||
step_sign = step > 0 ? 1 : -1;
|
||||
range_offset = step_sign * !end_included;
|
||||
|
||||
if (dynamic_distribution)
|
||||
{
|
||||
int next_start, new_start, end_value;
|
||||
|
||||
start = *dynamic_start;
|
||||
end_value = end + !!end_included * step;
|
||||
while (start != end_value)
|
||||
{
|
||||
step_count = get_step_count(start, end, range_offset, step);
|
||||
|
||||
curr_end = start + (step_count + thread_count - 1) / thread_count * step
|
||||
+ range_offset;
|
||||
|
||||
if ((curr_end - end) * step_sign > 0)
|
||||
{
|
||||
next_start = end_value;
|
||||
curr_end = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
next_start = curr_end - range_offset;
|
||||
curr_end -= step;
|
||||
}
|
||||
|
||||
if ((new_start = InterlockedCompareExchange(dynamic_start, next_start, start)) != start)
|
||||
{
|
||||
start = new_start;
|
||||
continue;
|
||||
}
|
||||
|
||||
wrapper_args[0] = (void *)(ULONG_PTR)start;
|
||||
wrapper_args[1] = (void *)(ULONG_PTR)curr_end;
|
||||
_vcomp_fork_call_wrapper(function, nargs, wrapper_args);
|
||||
start = *dynamic_start;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
step_count = get_step_count(start, end, range_offset, step);
|
||||
|
||||
/* According to the tests native vcomp still makes extra calls
|
||||
* with empty range from excessive threads under certain conditions
|
||||
* for unclear reason. */
|
||||
if (thread >= step_count && (end_included || (step != 1 && step != -1)))
|
||||
return;
|
||||
|
||||
steps_per_call = step_count / thread_count;
|
||||
remainder = step_count % thread_count;
|
||||
|
||||
if (thread < remainder)
|
||||
{
|
||||
curr_start = thread * (steps_per_call + 1);
|
||||
curr_end = curr_start + steps_per_call + 1;
|
||||
}
|
||||
else if (thread < step_count)
|
||||
{
|
||||
curr_start = remainder + steps_per_call * thread;
|
||||
curr_end = curr_start + steps_per_call;
|
||||
}
|
||||
else
|
||||
{
|
||||
curr_start = curr_end = 0;
|
||||
}
|
||||
|
||||
curr_start = start + curr_start * step;
|
||||
curr_end = start + (curr_end - 1) * step + range_offset;
|
||||
|
||||
wrapper_args[0] = (void *)(ULONG_PTR)curr_start;
|
||||
wrapper_args[1] = (void *)(ULONG_PTR)curr_end;
|
||||
_vcomp_fork_call_wrapper(function, nargs, wrapper_args);
|
||||
}
|
||||
|
||||
void CDECL C2VectParallel(int start, int end, int step, BOOL end_included, int thread_count,
|
||||
BOOL dynamic_distribution, void *function, int nargs, ...)
|
||||
{
|
||||
struct vcomp_thread_data *thread_data;
|
||||
int volatile dynamic_start;
|
||||
int prev_thread_count;
|
||||
__ms_va_list valist;
|
||||
|
||||
TRACE("start %d, end %d, step %d, end_included %d, thread_count %d, dynamic_distribution %#x,"
|
||||
" function %p, nargs %d.\n", start, end, step, end_included, thread_count,
|
||||
dynamic_distribution, function, nargs);
|
||||
|
||||
if (nargs > MAX_VECT_PARALLEL_CALLBACK_ARGS)
|
||||
{
|
||||
FIXME("Number of arguments %u exceeds supported maximum %u"
|
||||
" (not calling the loop code, expect problems).\n",
|
||||
nargs, MAX_VECT_PARALLEL_CALLBACK_ARGS);
|
||||
return;
|
||||
}
|
||||
|
||||
__ms_va_start(valist, nargs);
|
||||
|
||||
/* This expression can result in integer overflow. According to the tests,
|
||||
* native vcomp runs the function as a single thread both for empty range
|
||||
* and (end - start) not fitting the integer range. */
|
||||
if ((step > 0 && end < start) || (step < 0 && end > start)
|
||||
|| (end - start) / step < 2 || thread_count < 0)
|
||||
{
|
||||
void *wrapper_args[MAX_VECT_PARALLEL_CALLBACK_ARGS];
|
||||
|
||||
wrapper_args[0] = (void *)(ULONG_PTR)start;
|
||||
wrapper_args[1] = (void *)(ULONG_PTR)end;
|
||||
copy_va_list_data(&wrapper_args[2], valist, nargs - 2);
|
||||
_vcomp_fork_call_wrapper(function, nargs, wrapper_args);
|
||||
__ms_va_end(valist);
|
||||
return;
|
||||
}
|
||||
|
||||
thread_data = vcomp_init_thread_data();
|
||||
prev_thread_count = thread_data->fork_threads;
|
||||
thread_data->fork_threads = thread_count;
|
||||
|
||||
dynamic_start = start;
|
||||
|
||||
_vcomp_fork(TRUE, 9, c2vectparallel_wrapper, start, end, step, end_included, dynamic_distribution,
|
||||
&dynamic_start, function, nargs, valist);
|
||||
|
||||
thread_data->fork_threads = prev_thread_count;
|
||||
__ms_va_end(valist);
|
||||
}
|
||||
|
||||
BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
|
||||
{
|
||||
TRACE("(%p, %d, %p)\n", instance, reason, reserved);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
@ stub C2VectParallel
|
||||
@ varargs C2VectParallel(long long long long long long ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i1(ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i2(ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i4(ptr long)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
@ stub C2VectParallel
|
||||
@ varargs C2VectParallel(long long long long long long ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i1(ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i2(ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i4(ptr long)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
@ stub C2VectParallel
|
||||
@ varargs C2VectParallel(long long long long long long ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i1(ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i2(ptr long)
|
||||
@ cdecl _vcomp_atomic_add_i4(ptr long)
|
||||
|
|
Loading…
Reference in New Issue