wined3d: Improve wined3d_popcount() implementation.
Signed-off-by: Matteo Bruni <mbruni@codeweavers.com> Signed-off-by: Henri Verbeet <hverbeet@codeweavers.com> Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
parent
dba682c6f1
commit
961215b8b8
|
@ -16883,6 +16883,39 @@ $as_echo "#define HAVE___BUILTIN_CLZ 1" >>confdefs.h
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
|
||||||
|
$as_echo_n "checking for __builtin_popcount... " >&6; }
|
||||||
|
if ${ac_cv_have___builtin_popcount+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return __builtin_popcount(1)
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_c_try_link "$LINENO"; then :
|
||||||
|
ac_cv_have___builtin_popcount="yes"
|
||||||
|
else
|
||||||
|
ac_cv_have___builtin_popcount="no"
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext \
|
||||||
|
conftest$ac_exeext conftest.$ac_ext
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have___builtin_popcount" >&5
|
||||||
|
$as_echo "$ac_cv_have___builtin_popcount" >&6; }
|
||||||
|
if test "$ac_cv_have___builtin_popcount" = "yes"
|
||||||
|
then
|
||||||
|
|
||||||
|
$as_echo "#define HAVE___BUILTIN_POPCOUNT 1" >>confdefs.h
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
case $host_cpu in
|
case $host_cpu in
|
||||||
*i[3456789]86*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we need to define __i386__" >&5
|
*i[3456789]86*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we need to define __i386__" >&5
|
||||||
|
|
|
@ -2551,6 +2551,15 @@ then
|
||||||
AC_DEFINE(HAVE___BUILTIN_CLZ, 1, [Define to 1 if you have the `__builtin_clz' built-in function.])
|
AC_DEFINE(HAVE___BUILTIN_CLZ, 1, [Define to 1 if you have the `__builtin_clz' built-in function.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
dnl Check for __builtin_popcount
|
||||||
|
AC_CACHE_CHECK([for __builtin_popcount], ac_cv_have___builtin_popcount,
|
||||||
|
AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[return __builtin_popcount(1)]])],
|
||||||
|
[ac_cv_have___builtin_popcount="yes"], [ac_cv_have___builtin_popcount="no"]))
|
||||||
|
if test "$ac_cv_have___builtin_popcount" = "yes"
|
||||||
|
then
|
||||||
|
AC_DEFINE(HAVE___BUILTIN_POPCOUNT, 1, [Define to 1 if you have the `__builtin_popcount' built-in function.])
|
||||||
|
fi
|
||||||
|
|
||||||
dnl *** check for the need to define platform-specific symbols
|
dnl *** check for the need to define platform-specific symbols
|
||||||
|
|
||||||
case $host_cpu in
|
case $host_cpu in
|
||||||
|
|
|
@ -798,7 +798,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader
|
||||||
DWORD highest_constf = 0, clip_limit;
|
DWORD highest_constf = 0, clip_limit;
|
||||||
|
|
||||||
max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
|
max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
|
||||||
max_constantsF -= count_bits(reg_maps->integer_constants);
|
max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
|
||||||
max_constantsF -= gl_info->reserved_arb_constants;
|
max_constantsF -= gl_info->reserved_arb_constants;
|
||||||
|
|
||||||
for (i = 0; i < shader->limits->constant_float; ++i)
|
for (i = 0; i < shader->limits->constant_float; ++i)
|
||||||
|
@ -819,7 +819,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
|
unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
|
||||||
clip_limit = min(count_bits(mask), 4);
|
clip_limit = min(wined3d_popcount(mask), 4);
|
||||||
}
|
}
|
||||||
*num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
|
*num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
|
||||||
max_constantsF -= *num_clipplanes;
|
max_constantsF -= *num_clipplanes;
|
||||||
|
|
|
@ -1685,12 +1685,12 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont
|
||||||
max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
|
max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
|
||||||
if (vs_args->clip_enabled)
|
if (vs_args->clip_enabled)
|
||||||
max_constantsF -= gl_info->limits.clipplanes;
|
max_constantsF -= gl_info->limits.clipplanes;
|
||||||
max_constantsF -= count_bits(reg_maps->integer_constants);
|
max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
|
||||||
/* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
|
/* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
|
||||||
* so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
|
* so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
|
||||||
* for now take this into account when calculating the number of available constants
|
* for now take this into account when calculating the number of available constants
|
||||||
*/
|
*/
|
||||||
max_constantsF -= count_bits(reg_maps->boolean_constants);
|
max_constantsF -= wined3d_popcount(reg_maps->boolean_constants);
|
||||||
/* Set by driver quirks in directx.c */
|
/* Set by driver quirks in directx.c */
|
||||||
max_constantsF -= gl_info->reserved_glsl_constants;
|
max_constantsF -= gl_info->reserved_glsl_constants;
|
||||||
|
|
||||||
|
|
|
@ -1138,7 +1138,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
|
||||||
}
|
}
|
||||||
else if (!input_signature->elements && reg_maps->input_registers)
|
else if (!input_signature->elements && reg_maps->input_registers)
|
||||||
{
|
{
|
||||||
unsigned int count = count_bits(reg_maps->input_registers);
|
unsigned int count = wined3d_popcount(reg_maps->input_registers);
|
||||||
struct wined3d_shader_signature_element *e;
|
struct wined3d_shader_signature_element *e;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
|
@ -1165,7 +1165,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st
|
||||||
}
|
}
|
||||||
else if (reg_maps->output_registers)
|
else if (reg_maps->output_registers)
|
||||||
{
|
{
|
||||||
unsigned int count = count_bits(reg_maps->output_registers);
|
unsigned int count = wined3d_popcount(reg_maps->output_registers);
|
||||||
struct wined3d_shader_signature_element *e;
|
struct wined3d_shader_signature_element *e;
|
||||||
|
|
||||||
if (!(output_signature->elements = HeapAlloc(GetProcessHeap(), 0, sizeof(*output_signature->elements) * count)))
|
if (!(output_signature->elements = HeapAlloc(GetProcessHeap(), 0, sizeof(*output_signature->elements) * count)))
|
||||||
|
|
|
@ -4048,17 +4048,6 @@ void get_fog_start_end(const struct wined3d_context *context, const struct wined
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This small helper function is used to convert a bitmask into the number of masked bits */
|
|
||||||
unsigned int count_bits(unsigned int mask)
|
|
||||||
{
|
|
||||||
unsigned int count;
|
|
||||||
for (count = 0; mask; ++count)
|
|
||||||
{
|
|
||||||
mask &= mask - 1;
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Note: It's the caller's responsibility to ensure values can be expressed
|
/* Note: It's the caller's responsibility to ensure values can be expressed
|
||||||
* in the requested format. UNORM formats for example can only express values
|
* in the requested format. UNORM formats for example can only express values
|
||||||
* in the range 0.0f -> 1.0f. */
|
* in the range 0.0f -> 1.0f. */
|
||||||
|
|
|
@ -257,6 +257,17 @@ static inline float float_24_to_32(DWORD in)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int wined3d_popcount(unsigned int x)
|
||||||
|
{
|
||||||
|
#ifdef HAVE___BUILTIN_POPCOUNT
|
||||||
|
return __builtin_popcount(x);
|
||||||
|
#else
|
||||||
|
x -= x >> 1 & 0x55555555;
|
||||||
|
x = (x & 0x33333333) + (x >> 2 & 0x33333333);
|
||||||
|
return ((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101 >> 24;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#define ORM_BACKBUFFER 0
|
#define ORM_BACKBUFFER 0
|
||||||
#define ORM_FBO 1
|
#define ORM_FBO 1
|
||||||
|
|
||||||
|
@ -2930,7 +2941,6 @@ GLenum gl_primitive_type_from_d3d(enum wined3d_primitive_type primitive_type) DE
|
||||||
/* Math utils */
|
/* Math utils */
|
||||||
void multiply_matrix(struct wined3d_matrix *dest, const struct wined3d_matrix *src1,
|
void multiply_matrix(struct wined3d_matrix *dest, const struct wined3d_matrix *src1,
|
||||||
const struct wined3d_matrix *src2) DECLSPEC_HIDDEN;
|
const struct wined3d_matrix *src2) DECLSPEC_HIDDEN;
|
||||||
unsigned int count_bits(unsigned int mask) DECLSPEC_HIDDEN;
|
|
||||||
|
|
||||||
void wined3d_release_dc(HWND window, HDC dc) DECLSPEC_HIDDEN;
|
void wined3d_release_dc(HWND window, HDC dc) DECLSPEC_HIDDEN;
|
||||||
|
|
||||||
|
|
|
@ -1311,6 +1311,9 @@
|
||||||
/* Define to 1 if you have the `__builtin_clz' built-in function. */
|
/* Define to 1 if you have the `__builtin_clz' built-in function. */
|
||||||
#undef HAVE___BUILTIN_CLZ
|
#undef HAVE___BUILTIN_CLZ
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `__builtin_popcount' built-in function. */
|
||||||
|
#undef HAVE___BUILTIN_POPCOUNT
|
||||||
|
|
||||||
/* Define to 1 if you have the `__res_getservers' function. */
|
/* Define to 1 if you have the `__res_getservers' function. */
|
||||||
#undef HAVE___RES_GETSERVERS
|
#undef HAVE___RES_GETSERVERS
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue