diff --git a/dlls/ntdll/unix/signal_x86_64.c b/dlls/ntdll/unix/signal_x86_64.c index 12e40853397..5da1a467b71 100644 --- a/dlls/ntdll/unix/signal_x86_64.c +++ b/dlls/ntdll/unix/signal_x86_64.c @@ -266,10 +266,15 @@ struct apc_stack_layout C_ASSERT( offsetof(struct apc_stack_layout, context) == 0x30 ); C_ASSERT( sizeof(struct apc_stack_layout) == 0x510 ); +struct syscall_xsave +{ + XMM_SAVE_AREA32 xsave; +}; + +C_ASSERT( sizeof(struct syscall_xsave) == 0x200 ); + struct syscall_frame { - ULONG64 xmm[10 * 2]; /* xmm6-xmm15 */ - ULONG64 mxcsr; ULONG64 r12; ULONG64 r13; ULONG64 r14; @@ -283,7 +288,7 @@ struct syscall_frame }; /* Should match the offset in call_user_apc_dispatcher(). */ -C_ASSERT( offsetof( struct syscall_frame, ret_addr ) == 0xf0); +C_ASSERT( offsetof( struct syscall_frame, ret_addr ) == 0x48); struct amd64_thread_data { @@ -316,6 +321,11 @@ void set_syscall_frame(void *frame) amd64_thread_data()->syscall_frame = frame; } +static struct syscall_xsave *get_syscall_xsave( struct syscall_frame *frame ) +{ + return (struct syscall_xsave *)((ULONG_PTR)((struct syscall_xsave *)frame - 1) & ~63); +} + /*********************************************************************** * Definitions for Dwarf unwind tables */ @@ -1935,10 +1945,8 @@ NTSTATUS WINAPI NtGetContextThread( HANDLE handle, CONTEXT *context ) } if (needed_flags & CONTEXT_FLOATING_POINT) { - __asm__( "fxsave %0" : "=m" (context->u.FltSave) ); - context->MxCsr = frame->mxcsr; - memset( &context->u.s.Xmm0, 0, 6 * sizeof(context->u.s.Xmm0) ); - memcpy( &context->u.s.Xmm6, frame->xmm, 10 * sizeof(context->u.s.Xmm0) ); + context->u.FltSave = get_syscall_xsave(frame)->xsave; + context->MxCsr = context->u.FltSave.MxCsr; context->ContextFlags |= CONTEXT_FLOATING_POINT; } /* update the cached version of the debug registers */ @@ -2079,7 +2087,7 @@ __ASM_GLOBAL_FUNC( call_user_apc_dispatcher, "movq 0x98(%rcx),%rdx\n\t" /* context->Rsp */ "jmp 2f\n\t" "1:\tmovq 0x328(%rbx),%rax\n\t" /* amd64_thread_data()->syscall_frame */ - "leaq 0xf0(%rax),%rdx\n\t" /* &amd64_thread_data()->syscall_frame->ret_addr */ + "leaq 0x48(%rax),%rdx\n\t" /* &amd64_thread_data()->syscall_frame->ret_addr */ "2:\tsubq $0x510,%rdx\n\t" /* sizeof(struct apc_stack_layout) */ "andq $~0xf,%rdx\n\t" "addq $8,%rsp\n\t" /* pop return address */ @@ -2124,27 +2132,15 @@ __ASM_GLOBAL_FUNC( call_user_apc_dispatcher, __ASM_GLOBAL_FUNC( call_raise_user_exception_dispatcher, "movq %gs:0x30,%rdx\n\t" "movq 0x328(%rdx),%rax\n\t" /* amd64_thread_data()->syscall_frame */ - "movdqu 0x0(%rax),%xmm6\n\t" /* frame->xmm[0..19] */ - "movdqu 0x10(%rax),%xmm7\n\t" - "movdqu 0x20(%rax),%xmm8\n\t" - "movdqu 0x30(%rax),%xmm9\n\t" - "movdqu 0x40(%rax),%xmm10\n\t" - "movdqu 0x50(%rax),%xmm11\n\t" - "movdqu 0x60(%rax),%xmm12\n\t" - "movdqu 0x70(%rax),%xmm13\n\t" - "movdqu 0x80(%rax),%xmm14\n\t" - "movdqu 0x90(%rax),%xmm15\n\t" - "ldmxcsr 0xa0(%rax)\n\t" /* frame->mxcsr */ - "movq 0xa8(%rax),%r12\n\t" /* frame->r12 */ - "movq 0xb0(%rax),%r13\n\t" /* frame->r13 */ - "movq 0xb8(%rax),%r14\n\t" /* frame->r14 */ - "movq 0xc0(%rax),%r15\n\t" /* frame->r15 */ - "movq 0xc8(%rax),%rdi\n\t" /* frame->rdi */ - "movq 0xd0(%rax),%rsi\n\t" /* frame->rsi */ - "movq 0xd8(%rax),%rbx\n\t" /* frame->rbx */ - "movq 0xe0(%rax),%rbp\n\t" /* frame->rbp */ + "leaq -0x200(%rax),%r8\n\t" + "andq $~63,%r8\n\t" + "fxrstor64 (%r8)\n\t" + "movq 0x20(%rax),%rdi\n\t" /* frame->rdi */ + "movq 0x28(%rax),%rsi\n\t" /* frame->rsi */ + "movq 0x30(%rax),%rbx\n\t" /* frame->rbx */ + "movq 0x38(%rax),%rbp\n\t" /* frame->rbp */ "movq $0,0x328(%rdx)\n\t" - "leaq 0xf0(%rax),%rsp\n\t" + "leaq 0x48(%rax),%rsp\n\t" "jmpq *%rcx" ) @@ -2398,11 +2394,7 @@ static BOOL handle_syscall_fault( ucontext_t *sigcontext, EXCEPTION_RECORD *rec, R15_sig(sigcontext) = frame->r15; RSP_sig(sigcontext) = (ULONG_PTR)&frame->ret_addr; RIP_sig(sigcontext) = frame->thunk_addr; - if (fpu) - { - fpu->MxCsr =frame->mxcsr; - memcpy( fpu->XmmRegisters + 6, frame->xmm, sizeof(frame->xmm) ); - } + if (fpu) *fpu = get_syscall_xsave( frame )->xsave; amd64_thread_data()->syscall_frame = NULL; } return TRUE; diff --git a/tools/winebuild/import.c b/tools/winebuild/import.c index bc2d8497a47..fda1fe90541 100644 --- a/tools/winebuild/import.c +++ b/tools/winebuild/import.c @@ -1508,19 +1508,9 @@ void output_syscalls( DLLSPEC *spec ) output_cfi( ".cfi_rel_offset %%rbp,0" ); output( "\tmovq %%rsp,%%rbp\n" ); output_cfi( ".cfi_def_cfa_register %%rbp" ); - output( "\tleaq -0xe0(%%rbp),%%rsp\n" ); + output( "\tleaq -0x238(%%rbp),%%rsp\n" ); + output( "\tandq $~63,%%rsp\n" ); output( "\tmovq %%gs:0x30,%%rcx\n" ); - output( "\tmovdqu %%xmm6,-0xe0(%%rbp)\n" ); - output( "\tmovdqu %%xmm7,-0xd0(%%rbp)\n" ); - output( "\tmovdqu %%xmm8,-0xc0(%%rbp)\n" ); - output( "\tmovdqu %%xmm9,-0xb0(%%rbp)\n" ); - output( "\tmovdqu %%xmm10,-0xa0(%%rbp)\n" ); - output( "\tmovdqu %%xmm11,-0x90(%%rbp)\n" ); - output( "\tmovdqu %%xmm12,-0x80(%%rbp)\n" ); - output( "\tmovdqu %%xmm13,-0x70(%%rbp)\n" ); - output( "\tmovdqu %%xmm14,-0x60(%%rbp)\n" ); - output( "\tmovdqu %%xmm15,-0x50(%%rbp)\n" ); - output( "\tstmxcsr -0x40(%%rbp)\n" ); output( "\tmovq %%r12,-0x38(%%rbp)\n" ); output( "\tmovq %%r13,-0x30(%%rbp)\n" ); output( "\tmovq %%r14,-0x28(%%rbp)\n" ); @@ -1531,10 +1521,12 @@ void output_syscalls( DLLSPEC *spec ) output_cfi( ".cfi_rel_offset %%rsi,-16" ); output( "\tmovq %%rbx,-0x08(%%rbp)\n" ); output_cfi( ".cfi_rel_offset %%rbx,-8" ); + output( "\tfxsave64 (%%rsp)\n" ); /* Legends of Runeterra hooks the first system call return instruction, and * depends on us returning to it. Adjust the return address accordingly. */ output( "\tsubq $0xb,0x8(%%rbp)\n" ); - output( "\tmovq %%rsp,0x328(%%rcx)\n" ); /* amd64_thread_data()->syscall_frame */ + output( "\tleaq -0x38(%%rbp),%%rbx\n" ); + output( "\tmovq %%rbx,0x328(%%rcx)\n" ); /* amd64_thread_data()->syscall_frame */ output( "\tcmpq $%u,%%rax\n", count ); output( "\tjae 4f\n" ); output( "\tleaq .Lsyscall_args(%%rip),%%rcx\n" );