From 6d98de6889d641fc05dc17988df57ac30517f56a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Bernon?= Date: Thu, 21 Oct 2021 10:44:30 +0200 Subject: [PATCH] ntdll: Use the unrolled memset from msvcrt. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although less frequently used, heap allocations are zeroed out with ntdll memset and they don't benefit from the msvcrt memset optimisation. Signed-off-by: RĂ©mi Bernon Signed-off-by: Alexandre Julliard (cherry picked from commit a602bdd326cf09b9c77ecbd4cb59af3c32bf2ecd) Signed-off-by: Michael Stefaniuc --- dlls/ntdll/string.c | 66 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/dlls/ntdll/string.c b/dlls/ntdll/string.c index f1cea6caa83..cd4f3c6a8a2 100644 --- a/dlls/ntdll/string.c +++ b/dlls/ntdll/string.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "windef.h" #include "winbase.h" @@ -140,13 +141,72 @@ void * __cdecl memmove( void *dst, const void *src, size_t n ) } +static inline void memset_aligned_32( unsigned char *d, uint64_t v, size_t n ) +{ + unsigned char *end = d + n; + while (d < end) + { + *(uint64_t *)(d + 0) = v; + *(uint64_t *)(d + 8) = v; + *(uint64_t *)(d + 16) = v; + *(uint64_t *)(d + 24) = v; + d += 32; + } +} + /********************************************************************* * memset (NTDLL.@) */ -void * __cdecl memset( void *dst, int c, size_t n ) +void *__cdecl memset( void *dst, int c, size_t n ) { - volatile unsigned char *d = dst; /* avoid gcc optimizations */ - while (n--) *d++ = c; + typedef uint64_t DECLSPEC_ALIGN(1) unaligned_ui64; + typedef uint32_t DECLSPEC_ALIGN(1) unaligned_ui32; + typedef uint16_t DECLSPEC_ALIGN(1) unaligned_ui16; + + uint64_t v = 0x101010101010101ull * (unsigned char)c; + unsigned char *d = (unsigned char *)dst; + size_t a = 0x20 - ((uintptr_t)d & 0x1f); + + if (n >= 16) + { + *(unaligned_ui64 *)(d + 0) = v; + *(unaligned_ui64 *)(d + 8) = v; + *(unaligned_ui64 *)(d + n - 16) = v; + *(unaligned_ui64 *)(d + n - 8) = v; + if (n <= 32) return dst; + *(unaligned_ui64 *)(d + 16) = v; + *(unaligned_ui64 *)(d + 24) = v; + *(unaligned_ui64 *)(d + n - 32) = v; + *(unaligned_ui64 *)(d + n - 24) = v; + if (n <= 64) return dst; + + n = (n - a) & ~0x1f; + memset_aligned_32( d + a, v, n ); + return dst; + } + if (n >= 8) + { + *(unaligned_ui64 *)d = v; + *(unaligned_ui64 *)(d + n - 8) = v; + return dst; + } + if (n >= 4) + { + *(unaligned_ui32 *)d = v; + *(unaligned_ui32 *)(d + n - 4) = v; + return dst; + } + if (n >= 2) + { + *(unaligned_ui16 *)d = v; + *(unaligned_ui16 *)(d + n - 2) = v; + return dst; + } + if (n >= 1) + { + *(uint8_t *)d = v; + return dst; + } return dst; }