ntdll: Store entire XMM context in x86_64 syscall frame.

Signed-off-by: Jacek Caban <jacek@codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Jacek Caban 2021-02-15 21:57:42 +01:00 committed by Alexandre Julliard
parent 50903a1504
commit 8f50dde9cd
2 changed files with 30 additions and 46 deletions

View File

@ -266,10 +266,15 @@ struct apc_stack_layout
C_ASSERT( offsetof(struct apc_stack_layout, context) == 0x30 );
C_ASSERT( sizeof(struct apc_stack_layout) == 0x510 );
struct syscall_xsave
{
XMM_SAVE_AREA32 xsave;
};
C_ASSERT( sizeof(struct syscall_xsave) == 0x200 );
struct syscall_frame
{
ULONG64 xmm[10 * 2]; /* xmm6-xmm15 */
ULONG64 mxcsr;
ULONG64 r12;
ULONG64 r13;
ULONG64 r14;
@ -283,7 +288,7 @@ struct syscall_frame
};
/* Should match the offset in call_user_apc_dispatcher(). */
C_ASSERT( offsetof( struct syscall_frame, ret_addr ) == 0xf0);
C_ASSERT( offsetof( struct syscall_frame, ret_addr ) == 0x48);
struct amd64_thread_data
{
@ -316,6 +321,11 @@ void set_syscall_frame(void *frame)
amd64_thread_data()->syscall_frame = frame;
}
static struct syscall_xsave *get_syscall_xsave( struct syscall_frame *frame )
{
return (struct syscall_xsave *)((ULONG_PTR)((struct syscall_xsave *)frame - 1) & ~63);
}
/***********************************************************************
* Definitions for Dwarf unwind tables
*/
@ -1935,10 +1945,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context )
}
if (needed_flags & CONTEXT_FLOATING_POINT)
{
__asm__( "fxsave %0" : "=m" (context->u.FltSave) );
context->MxCsr = frame->mxcsr;
memset( &context->u.s.Xmm0, 0, 6 * sizeof(context->u.s.Xmm0) );
memcpy( &context->u.s.Xmm6, frame->xmm, 10 * sizeof(context->u.s.Xmm0) );
context->u.FltSave = get_syscall_xsave(frame)->xsave;
context->MxCsr = context->u.FltSave.MxCsr;
context->ContextFlags |= CONTEXT_FLOATING_POINT;
}
/* update the cached version of the debug registers */
@ -2079,7 +2087,7 @@ __ASM_GLOBAL_FUNC( call_user_apc_dispatcher,
"movq 0x98(%rcx),%rdx\n\t" /* context->Rsp */
"jmp 2f\n\t"
"1:\tmovq 0x328(%rbx),%rax\n\t" /* amd64_thread_data()->syscall_frame */
"leaq 0xf0(%rax),%rdx\n\t" /* &amd64_thread_data()->syscall_frame->ret_addr */
"leaq 0x48(%rax),%rdx\n\t" /* &amd64_thread_data()->syscall_frame->ret_addr */
"2:\tsubq $0x510,%rdx\n\t" /* sizeof(struct apc_stack_layout) */
"andq $~0xf,%rdx\n\t"
"addq $8,%rsp\n\t" /* pop return address */
@ -2124,27 +2132,15 @@ __ASM_GLOBAL_FUNC( call_user_apc_dispatcher,
__ASM_GLOBAL_FUNC( call_raise_user_exception_dispatcher,
"movq %gs:0x30,%rdx\n\t"
"movq 0x328(%rdx),%rax\n\t" /* amd64_thread_data()->syscall_frame */
"movdqu 0x0(%rax),%xmm6\n\t" /* frame->xmm[0..19] */
"movdqu 0x10(%rax),%xmm7\n\t"
"movdqu 0x20(%rax),%xmm8\n\t"
"movdqu 0x30(%rax),%xmm9\n\t"
"movdqu 0x40(%rax),%xmm10\n\t"
"movdqu 0x50(%rax),%xmm11\n\t"
"movdqu 0x60(%rax),%xmm12\n\t"
"movdqu 0x70(%rax),%xmm13\n\t"
"movdqu 0x80(%rax),%xmm14\n\t"
"movdqu 0x90(%rax),%xmm15\n\t"
"ldmxcsr 0xa0(%rax)\n\t" /* frame->mxcsr */
"movq 0xa8(%rax),%r12\n\t" /* frame->r12 */
"movq 0xb0(%rax),%r13\n\t" /* frame->r13 */
"movq 0xb8(%rax),%r14\n\t" /* frame->r14 */
"movq 0xc0(%rax),%r15\n\t" /* frame->r15 */
"movq 0xc8(%rax),%rdi\n\t" /* frame->rdi */
"movq 0xd0(%rax),%rsi\n\t" /* frame->rsi */
"movq 0xd8(%rax),%rbx\n\t" /* frame->rbx */
"movq 0xe0(%rax),%rbp\n\t" /* frame->rbp */
"leaq -0x200(%rax),%r8\n\t"
"andq $~63,%r8\n\t"
"fxrstor64 (%r8)\n\t"
"movq 0x20(%rax),%rdi\n\t" /* frame->rdi */
"movq 0x28(%rax),%rsi\n\t" /* frame->rsi */
"movq 0x30(%rax),%rbx\n\t" /* frame->rbx */
"movq 0x38(%rax),%rbp\n\t" /* frame->rbp */
"movq $0,0x328(%rdx)\n\t"
"leaq 0xf0(%rax),%rsp\n\t"
"leaq 0x48(%rax),%rsp\n\t"
"jmpq *%rcx" )
@ -2398,11 +2394,7 @@ static BOOL handle_syscall_fault( ucontext_t *sigcontext, EXCEPTION_RECORD *rec,
R15_sig(sigcontext) = frame->r15;
RSP_sig(sigcontext) = (ULONG_PTR)&frame->ret_addr;
RIP_sig(sigcontext) = frame->thunk_addr;
if (fpu)
{
fpu->MxCsr =frame->mxcsr;
memcpy( fpu->XmmRegisters + 6, frame->xmm, sizeof(frame->xmm) );
}
if (fpu) *fpu = get_syscall_xsave( frame )->xsave;
amd64_thread_data()->syscall_frame = NULL;
}
return TRUE;

View File

@ -1508,19 +1508,9 @@ void output_syscalls( DLLSPEC *spec )
output_cfi( ".cfi_rel_offset %%rbp,0" );
output( "\tmovq %%rsp,%%rbp\n" );
output_cfi( ".cfi_def_cfa_register %%rbp" );
output( "\tleaq -0xe0(%%rbp),%%rsp\n" );
output( "\tleaq -0x238(%%rbp),%%rsp\n" );
output( "\tandq $~63,%%rsp\n" );
output( "\tmovq %%gs:0x30,%%rcx\n" );
output( "\tmovdqu %%xmm6,-0xe0(%%rbp)\n" );
output( "\tmovdqu %%xmm7,-0xd0(%%rbp)\n" );
output( "\tmovdqu %%xmm8,-0xc0(%%rbp)\n" );
output( "\tmovdqu %%xmm9,-0xb0(%%rbp)\n" );
output( "\tmovdqu %%xmm10,-0xa0(%%rbp)\n" );
output( "\tmovdqu %%xmm11,-0x90(%%rbp)\n" );
output( "\tmovdqu %%xmm12,-0x80(%%rbp)\n" );
output( "\tmovdqu %%xmm13,-0x70(%%rbp)\n" );
output( "\tmovdqu %%xmm14,-0x60(%%rbp)\n" );
output( "\tmovdqu %%xmm15,-0x50(%%rbp)\n" );
output( "\tstmxcsr -0x40(%%rbp)\n" );
output( "\tmovq %%r12,-0x38(%%rbp)\n" );
output( "\tmovq %%r13,-0x30(%%rbp)\n" );
output( "\tmovq %%r14,-0x28(%%rbp)\n" );
@ -1531,10 +1521,12 @@ void output_syscalls( DLLSPEC *spec )
output_cfi( ".cfi_rel_offset %%rsi,-16" );
output( "\tmovq %%rbx,-0x08(%%rbp)\n" );
output_cfi( ".cfi_rel_offset %%rbx,-8" );
output( "\tfxsave64 (%%rsp)\n" );
/* Legends of Runeterra hooks the first system call return instruction, and
* depends on us returning to it. Adjust the return address accordingly. */
output( "\tsubq $0xb,0x8(%%rbp)\n" );
output( "\tmovq %%rsp,0x328(%%rcx)\n" ); /* amd64_thread_data()->syscall_frame */
output( "\tleaq -0x38(%%rbp),%%rbx\n" );
output( "\tmovq %%rbx,0x328(%%rcx)\n" ); /* amd64_thread_data()->syscall_frame */
output( "\tcmpq $%u,%%rax\n", count );
output( "\tjae 4f\n" );
output( "\tleaq .Lsyscall_args(%%rip),%%rcx\n" );