ntdll: Use the unrolled memset from msvcrt.
Although less frequently used, heap allocations are zeroed out with ntdll memset and they don't benefit from the msvcrt memset optimisation. Signed-off-by: Rémi Bernon <rbernon@codeweavers.com> Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
parent
7c046c7afd
commit
a602bdd326
|
@ -25,6 +25,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "windef.h"
|
#include "windef.h"
|
||||||
#include "winbase.h"
|
#include "winbase.h"
|
||||||
|
@ -140,13 +141,72 @@ void * __cdecl memmove( void *dst, const void *src, size_t n )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void memset_aligned_32( unsigned char *d, uint64_t v, size_t n )
|
||||||
|
{
|
||||||
|
unsigned char *end = d + n;
|
||||||
|
while (d < end)
|
||||||
|
{
|
||||||
|
*(uint64_t *)(d + 0) = v;
|
||||||
|
*(uint64_t *)(d + 8) = v;
|
||||||
|
*(uint64_t *)(d + 16) = v;
|
||||||
|
*(uint64_t *)(d + 24) = v;
|
||||||
|
d += 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*********************************************************************
|
/*********************************************************************
|
||||||
* memset (NTDLL.@)
|
* memset (NTDLL.@)
|
||||||
*/
|
*/
|
||||||
void * __cdecl memset( void *dst, int c, size_t n )
|
void *__cdecl memset( void *dst, int c, size_t n )
|
||||||
{
|
{
|
||||||
volatile unsigned char *d = dst; /* avoid gcc optimizations */
|
typedef uint64_t DECLSPEC_ALIGN(1) unaligned_ui64;
|
||||||
while (n--) *d++ = c;
|
typedef uint32_t DECLSPEC_ALIGN(1) unaligned_ui32;
|
||||||
|
typedef uint16_t DECLSPEC_ALIGN(1) unaligned_ui16;
|
||||||
|
|
||||||
|
uint64_t v = 0x101010101010101ull * (unsigned char)c;
|
||||||
|
unsigned char *d = (unsigned char *)dst;
|
||||||
|
size_t a = 0x20 - ((uintptr_t)d & 0x1f);
|
||||||
|
|
||||||
|
if (n >= 16)
|
||||||
|
{
|
||||||
|
*(unaligned_ui64 *)(d + 0) = v;
|
||||||
|
*(unaligned_ui64 *)(d + 8) = v;
|
||||||
|
*(unaligned_ui64 *)(d + n - 16) = v;
|
||||||
|
*(unaligned_ui64 *)(d + n - 8) = v;
|
||||||
|
if (n <= 32) return dst;
|
||||||
|
*(unaligned_ui64 *)(d + 16) = v;
|
||||||
|
*(unaligned_ui64 *)(d + 24) = v;
|
||||||
|
*(unaligned_ui64 *)(d + n - 32) = v;
|
||||||
|
*(unaligned_ui64 *)(d + n - 24) = v;
|
||||||
|
if (n <= 64) return dst;
|
||||||
|
|
||||||
|
n = (n - a) & ~0x1f;
|
||||||
|
memset_aligned_32( d + a, v, n );
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
if (n >= 8)
|
||||||
|
{
|
||||||
|
*(unaligned_ui64 *)d = v;
|
||||||
|
*(unaligned_ui64 *)(d + n - 8) = v;
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
if (n >= 4)
|
||||||
|
{
|
||||||
|
*(unaligned_ui32 *)d = v;
|
||||||
|
*(unaligned_ui32 *)(d + n - 4) = v;
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
if (n >= 2)
|
||||||
|
{
|
||||||
|
*(unaligned_ui16 *)d = v;
|
||||||
|
*(unaligned_ui16 *)(d + n - 2) = v;
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
if (n >= 1)
|
||||||
|
{
|
||||||
|
*(uint8_t *)d = v;
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue