diff --git a/configure b/configure index 6edecf65722..c26963d741b 100755 --- a/configure +++ b/configure @@ -16883,6 +16883,39 @@ $as_echo "#define HAVE___BUILTIN_CLZ 1" >>confdefs.h fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5 +$as_echo_n "checking for __builtin_popcount... " >&6; } +if ${ac_cv_have___builtin_popcount+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +return __builtin_popcount(1) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_have___builtin_popcount="yes" +else + ac_cv_have___builtin_popcount="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have___builtin_popcount" >&5 +$as_echo "$ac_cv_have___builtin_popcount" >&6; } +if test "$ac_cv_have___builtin_popcount" = "yes" +then + +$as_echo "#define HAVE___BUILTIN_POPCOUNT 1" >>confdefs.h + +fi + case $host_cpu in *i[3456789]86*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we need to define __i386__" >&5 diff --git a/configure.ac b/configure.ac index 853277f29cb..e434fbcb646 100644 --- a/configure.ac +++ b/configure.ac @@ -2551,6 +2551,15 @@ then AC_DEFINE(HAVE___BUILTIN_CLZ, 1, [Define to 1 if you have the `__builtin_clz' built-in function.]) fi +dnl Check for __builtin_popcount +AC_CACHE_CHECK([for __builtin_popcount], ac_cv_have___builtin_popcount, + AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[return __builtin_popcount(1)]])], + [ac_cv_have___builtin_popcount="yes"], [ac_cv_have___builtin_popcount="no"])) +if test "$ac_cv_have___builtin_popcount" = "yes" +then + AC_DEFINE(HAVE___BUILTIN_POPCOUNT, 1, [Define to 1 if you have the `__builtin_popcount' built-in function.]) +fi + dnl *** check for the need to define platform-specific symbols case $host_cpu in diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c index 5dfa92f4c81..a076cec21ca 100644 --- a/dlls/wined3d/arb_program_shader.c +++ b/dlls/wined3d/arb_program_shader.c @@ -798,7 +798,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader DWORD highest_constf = 0, clip_limit; max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info); - max_constantsF -= count_bits(reg_maps->integer_constants); + max_constantsF -= wined3d_popcount(reg_maps->integer_constants); max_constantsF -= gl_info->reserved_arb_constants; for (i = 0; i < shader->limits->constant_float; ++i) @@ -819,7 +819,7 @@ static void shader_generate_arb_declarations(const struct wined3d_shader *shader else { unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask; - clip_limit = min(count_bits(mask), 4); + clip_limit = min(wined3d_popcount(mask), 4); } *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1); max_constantsF -= *num_clipplanes; diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index ec3c2cb2f96..1eb7e6dba9a 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -1685,12 +1685,12 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont max_constantsF = gl_info->limits.glsl_vs_float_constants - 3; if (vs_args->clip_enabled) max_constantsF -= gl_info->limits.clipplanes; - max_constantsF -= count_bits(reg_maps->integer_constants); + max_constantsF -= wined3d_popcount(reg_maps->integer_constants); /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly, * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but * for now take this into account when calculating the number of available constants */ - max_constantsF -= count_bits(reg_maps->boolean_constants); + max_constantsF -= wined3d_popcount(reg_maps->boolean_constants); /* Set by driver quirks in directx.c */ max_constantsF -= gl_info->reserved_glsl_constants; diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index 5730cef5445..140665e0229 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -1138,7 +1138,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st } else if (!input_signature->elements && reg_maps->input_registers) { - unsigned int count = count_bits(reg_maps->input_registers); + unsigned int count = wined3d_popcount(reg_maps->input_registers); struct wined3d_shader_signature_element *e; unsigned int i; @@ -1165,7 +1165,7 @@ static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const st } else if (reg_maps->output_registers) { - unsigned int count = count_bits(reg_maps->output_registers); + unsigned int count = wined3d_popcount(reg_maps->output_registers); struct wined3d_shader_signature_element *e; if (!(output_signature->elements = HeapAlloc(GetProcessHeap(), 0, sizeof(*output_signature->elements) * count))) diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 5eb1696f0f5..28a32163ced 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -4048,17 +4048,6 @@ void get_fog_start_end(const struct wined3d_context *context, const struct wined } } -/* This small helper function is used to convert a bitmask into the number of masked bits */ -unsigned int count_bits(unsigned int mask) -{ - unsigned int count; - for (count = 0; mask; ++count) - { - mask &= mask - 1; - } - return count; -} - /* Note: It's the caller's responsibility to ensure values can be expressed * in the requested format. UNORM formats for example can only express values * in the range 0.0f -> 1.0f. */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 5558c70236b..b23aedb9868 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -257,6 +257,17 @@ static inline float float_24_to_32(DWORD in) } } +static inline unsigned int wined3d_popcount(unsigned int x) +{ +#ifdef HAVE___BUILTIN_POPCOUNT + return __builtin_popcount(x); +#else + x -= x >> 1 & 0x55555555; + x = (x & 0x33333333) + (x >> 2 & 0x33333333); + return ((x + (x >> 4)) & 0x0f0f0f0f) * 0x01010101 >> 24; +#endif +} + #define ORM_BACKBUFFER 0 #define ORM_FBO 1 @@ -2930,7 +2941,6 @@ GLenum gl_primitive_type_from_d3d(enum wined3d_primitive_type primitive_type) DE /* Math utils */ void multiply_matrix(struct wined3d_matrix *dest, const struct wined3d_matrix *src1, const struct wined3d_matrix *src2) DECLSPEC_HIDDEN; -unsigned int count_bits(unsigned int mask) DECLSPEC_HIDDEN; void wined3d_release_dc(HWND window, HDC dc) DECLSPEC_HIDDEN; diff --git a/include/config.h.in b/include/config.h.in index 805374ecee7..86318c5f185 100644 --- a/include/config.h.in +++ b/include/config.h.in @@ -1311,6 +1311,9 @@ /* Define to 1 if you have the `__builtin_clz' built-in function. */ #undef HAVE___BUILTIN_CLZ +/* Define to 1 if you have the `__builtin_popcount' built-in function. */ +#undef HAVE___BUILTIN_POPCOUNT + /* Define to 1 if you have the `__res_getservers' function. */ #undef HAVE___RES_GETSERVERS