Index: src/forwarding/apple-forwarding-x86_64.S ================================================================== --- src/forwarding/apple-forwarding-x86_64.S +++ src/forwarding/apple-forwarding-x86_64.S @@ -43,18 +43,18 @@ movq %rsi, -0x18(%rbp) movq %rdx, -0x20(%rbp) movq %rcx, -0x28(%rbp) movq %r8, -0x30(%rbp) movq %r9, -0x38(%rbp) - movdqa %xmm0, -0x50(%rbp) - movdqa %xmm1, -0x60(%rbp) - movdqa %xmm2, -0x70(%rbp) - movdqa %xmm3, -0x80(%rbp) - movdqa %xmm4, -0x90(%rbp) - movdqa %xmm5, -0xA0(%rbp) - movdqa %xmm6, -0xB0(%rbp) - movdqa %xmm7, -0xC0(%rbp) + movaps %xmm0, -0x50(%rbp) + movaps %xmm1, -0x60(%rbp) + movaps %xmm2, -0x70(%rbp) + movaps %xmm3, -0x80(%rbp) + movaps %xmm4, -0x90(%rbp) + movaps %xmm5, -0xA0(%rbp) + movaps %xmm6, -0xB0(%rbp) + movaps %xmm7, -0xC0(%rbp) call _object_getClass movq %rax, %rdi movq sel_forwardingTargetForSelector_(%rip), %rsi @@ -74,18 +74,18 @@ je 0f movq %rax, %rdi /* Restore all arguments, except %rdi */ - movdqa -0xC0(%rbp), %xmm7 - movdqa -0xB0(%rbp), %xmm6 - movdqa -0xA0(%rbp), %xmm5 - movdqa -0x90(%rbp), %xmm4 - movdqa -0x80(%rbp), %xmm3 - movdqa -0x70(%rbp), %xmm2 - movdqa -0x60(%rbp), %xmm1 - movdqa -0x50(%rbp), %xmm0 + movaps -0xC0(%rbp), %xmm7 + movaps -0xB0(%rbp), %xmm6 + movaps -0xA0(%rbp), %xmm5 + movaps -0x90(%rbp), %xmm4 + movaps -0x80(%rbp), %xmm3 + movaps -0x70(%rbp), %xmm2 + movaps -0x60(%rbp), %xmm1 + movaps -0x50(%rbp), %xmm0 movq -0x38(%rbp), %r9 movq -0x30(%rbp), %r8 movq -0x28(%rbp), %rcx movq -0x20(%rbp), %rdx movq -0x18(%rbp), %rsi @@ -116,18 +116,18 @@ movq %rsi, -0x18(%rbp) movq %rdx, -0x20(%rbp) movq %rcx, -0x28(%rbp) movq %r8, -0x30(%rbp) movq %r9, -0x38(%rbp) - movdqa %xmm0, -0x50(%rbp) - movdqa %xmm1, -0x60(%rbp) - movdqa %xmm2, -0x70(%rbp) - movdqa %xmm3, -0x80(%rbp) - movdqa %xmm4, -0x90(%rbp) - movdqa %xmm5, -0xA0(%rbp) - movdqa %xmm6, -0xB0(%rbp) - movdqa %xmm7, -0xC0(%rbp) + movaps %xmm0, -0x50(%rbp) + movaps %xmm1, -0x60(%rbp) + movaps %xmm2, -0x70(%rbp) + movaps %xmm3, -0x80(%rbp) + movaps %xmm4, -0x90(%rbp) + movaps %xmm5, -0xA0(%rbp) + movaps %xmm6, -0xB0(%rbp) + movaps %xmm7, -0xC0(%rbp) movq %rsi, %rdi call _object_getClass movq %rax, %rdi @@ -147,18 +147,18 @@ je 0f movq %rax, %rsi /* Restore all arguments, except %rsi */ - movdqa -0xC0(%rbp), %xmm7 - movdqa -0xB0(%rbp), %xmm6 - movdqa -0xA0(%rbp), %xmm5 - movdqa -0x90(%rbp), %xmm4 - movdqa -0x80(%rbp), %xmm3 - movdqa -0x70(%rbp), %xmm2 - movdqa -0x60(%rbp), %xmm1 - movdqa -0x50(%rbp), %xmm0 + movaps -0xC0(%rbp), %xmm7 + movaps -0xB0(%rbp), %xmm6 + movaps -0xA0(%rbp), %xmm5 + movaps -0x90(%rbp), %xmm4 + movaps -0x80(%rbp), %xmm3 + movaps -0x70(%rbp), %xmm2 + movaps -0x60(%rbp), %xmm1 + movaps -0x50(%rbp), %xmm0 movq -0x38(%rbp), %r9 movq -0x30(%rbp), %r8 movq -0x28(%rbp), %rcx movq -0x20(%rbp), %rdx movq -0x10(%rbp), %rdi Index: src/forwarding/forwarding-x86_64-elf.S ================================================================== --- src/forwarding/forwarding-x86_64-elf.S +++ src/forwarding/forwarding-x86_64-elf.S @@ -34,18 +34,18 @@ movq %rsi, -0x18(%rbp) movq %rdx, -0x20(%rbp) movq %rcx, -0x28(%rbp) movq %r8, -0x30(%rbp) movq %r9, -0x38(%rbp) - movdqa %xmm0, -0x50(%rbp) - movdqa %xmm1, -0x60(%rbp) - movdqa %xmm2, -0x70(%rbp) - movdqa %xmm3, -0x80(%rbp) - movdqa %xmm4, -0x90(%rbp) - movdqa %xmm5, -0xA0(%rbp) - movdqa %xmm6, -0xB0(%rbp) - movdqa %xmm7, -0xC0(%rbp) + movaps %xmm0, -0x50(%rbp) + movaps %xmm1, -0x60(%rbp) + movaps %xmm2, -0x70(%rbp) + movaps %xmm3, -0x80(%rbp) + movaps %xmm4, -0x90(%rbp) + movaps %xmm5, -0xA0(%rbp) + movaps %xmm6, -0xB0(%rbp) + movaps %xmm7, -0xC0(%rbp) call object_getClass@PLT movq %rax, %rdi leaq sel_forwardingTargetForSelector_(%rip), %rsi @@ -74,18 +74,18 @@ movq -0x18(%rbp), %rsi call objc_msg_lookup@PLT movq %rax, %r11 /* Restore all arguments */ - movdqa -0xC0(%rbp), %xmm7 - movdqa -0xB0(%rbp), %xmm6 - movdqa -0xA0(%rbp), %xmm5 - movdqa -0x90(%rbp), %xmm4 - movdqa -0x80(%rbp), %xmm3 - movdqa -0x70(%rbp), %xmm2 - movdqa -0x60(%rbp), %xmm1 - movdqa -0x50(%rbp), %xmm0 + movaps -0xC0(%rbp), %xmm7 + movaps -0xB0(%rbp), %xmm6 + movaps -0xA0(%rbp), %xmm5 + movaps -0x90(%rbp), %xmm4 + movaps -0x80(%rbp), %xmm3 + movaps -0x70(%rbp), %xmm2 + movaps -0x60(%rbp), %xmm1 + movaps -0x50(%rbp), %xmm0 movq -0x38(%rbp), %r9 movq -0x30(%rbp), %r8 movq -0x28(%rbp), %rcx movq -0x20(%rbp), %rdx movq -0x18(%rbp), %rsi @@ -119,18 +119,18 @@ movq %rsi, -0x18(%rbp) movq %rdx, -0x20(%rbp) movq %rcx, -0x28(%rbp) movq %r8, -0x30(%rbp) movq %r9, -0x38(%rbp) - movdqa %xmm0, -0x50(%rbp) - movdqa %xmm1, -0x60(%rbp) - movdqa %xmm2, -0x70(%rbp) - movdqa %xmm3, -0x80(%rbp) - movdqa %xmm4, -0x90(%rbp) - movdqa %xmm5, -0xA0(%rbp) - movdqa %xmm6, -0xB0(%rbp) - movdqa %xmm7, -0xC0(%rbp) + movaps %xmm0, -0x50(%rbp) + movaps %xmm1, -0x60(%rbp) + movaps %xmm2, -0x70(%rbp) + movaps %xmm3, -0x80(%rbp) + movaps %xmm4, -0x90(%rbp) + movaps %xmm5, -0xA0(%rbp) + movaps %xmm6, -0xB0(%rbp) + movaps %xmm7, -0xC0(%rbp) movq %rsi, %rdi call object_getClass@PLT movq %rax, %rdi @@ -160,18 +160,18 @@ movq -0x20(%rbp), %rsi call objc_msg_lookup_stret@PLT movq %rax, %r11 /* Restore all arguments */ - movdqa -0xC0(%rbp), %xmm7 - movdqa -0xB0(%rbp), %xmm6 - movdqa -0xA0(%rbp), %xmm5 - movdqa -0x90(%rbp), %xmm4 - movdqa -0x80(%rbp), %xmm3 - movdqa -0x70(%rbp), %xmm2 - movdqa -0x60(%rbp), %xmm1 - movdqa -0x50(%rbp), %xmm0 + movaps -0xC0(%rbp), %xmm7 + movaps -0xB0(%rbp), %xmm6 + movaps -0xA0(%rbp), %xmm5 + movaps -0x90(%rbp), %xmm4 + movaps -0x80(%rbp), %xmm3 + movaps -0x70(%rbp), %xmm2 + movaps -0x60(%rbp), %xmm1 + movaps -0x50(%rbp), %xmm0 movq -0x38(%rbp), %r9 movq -0x30(%rbp), %r8 movq -0x28(%rbp), %rcx movq -0x20(%rbp), %rdx movq -0x18(%rbp), %rsi Index: src/forwarding/forwarding-x86_64-macho.S ================================================================== --- src/forwarding/forwarding-x86_64-macho.S +++ src/forwarding/forwarding-x86_64-macho.S @@ -34,18 +34,18 @@ movq %rsi, -0x18(%rbp) movq %rdx, -0x20(%rbp) movq %rcx, -0x28(%rbp) movq %r8, -0x30(%rbp) movq %r9, -0x38(%rbp) - movdqa %xmm0, -0x50(%rbp) - movdqa %xmm1, -0x60(%rbp) - movdqa %xmm2, -0x70(%rbp) - movdqa %xmm3, -0x80(%rbp) - movdqa %xmm4, -0x90(%rbp) - movdqa %xmm5, -0xA0(%rbp) - movdqa %xmm6, -0xB0(%rbp) - movdqa %xmm7, -0xC0(%rbp) + movaps %xmm0, -0x50(%rbp) + movaps %xmm1, -0x60(%rbp) + movaps %xmm2, -0x70(%rbp) + movaps %xmm3, -0x80(%rbp) + movaps %xmm4, -0x90(%rbp) + movaps %xmm5, -0xA0(%rbp) + movaps %xmm6, -0xB0(%rbp) + movaps %xmm7, -0xC0(%rbp) call _object_getClass movq %rax, %rdi leaq sel_forwardingTargetForSelector_(%rip), %rsi @@ -74,18 +74,18 @@ movq -0x18(%rbp), %rsi call _objc_msg_lookup movq %rax, %r11 /* Restore all arguments */ - movdqa -0xC0(%rbp), %xmm7 - movdqa -0xB0(%rbp), %xmm6 - movdqa -0xA0(%rbp), %xmm5 - movdqa -0x90(%rbp), %xmm4 - movdqa -0x80(%rbp), %xmm3 - movdqa -0x70(%rbp), %xmm2 - movdqa -0x60(%rbp), %xmm1 - movdqa -0x50(%rbp), %xmm0 + movaps -0xC0(%rbp), %xmm7 + movaps -0xB0(%rbp), %xmm6 + movaps -0xA0(%rbp), %xmm5 + movaps -0x90(%rbp), %xmm4 + movaps -0x80(%rbp), %xmm3 + movaps -0x70(%rbp), %xmm2 + movaps -0x60(%rbp), %xmm1 + movaps -0x50(%rbp), %xmm0 movq -0x38(%rbp), %r9 movq -0x30(%rbp), %r8 movq -0x28(%rbp), %rcx movq -0x20(%rbp), %rdx movq -0x18(%rbp), %rsi @@ -117,18 +117,18 @@ movq %rsi, -0x18(%rbp) movq %rdx, -0x20(%rbp) movq %rcx, -0x28(%rbp) movq %r8, -0x30(%rbp) movq %r9, -0x38(%rbp) - movdqa %xmm0, -0x50(%rbp) - movdqa %xmm1, -0x60(%rbp) - movdqa %xmm2, -0x70(%rbp) - movdqa %xmm3, -0x80(%rbp) - movdqa %xmm4, -0x90(%rbp) - movdqa %xmm5, -0xA0(%rbp) - movdqa %xmm6, -0xB0(%rbp) - movdqa %xmm7, -0xC0(%rbp) + movaps %xmm0, -0x50(%rbp) + movaps %xmm1, -0x60(%rbp) + movaps %xmm2, -0x70(%rbp) + movaps %xmm3, -0x80(%rbp) + movaps %xmm4, -0x90(%rbp) + movaps %xmm5, -0xA0(%rbp) + movaps %xmm6, -0xB0(%rbp) + movaps %xmm7, -0xC0(%rbp) movq %rsi, %rdi call _object_getClass movq %rax, %rdi @@ -158,18 +158,18 @@ movq -0x20(%rbp), %rsi call _objc_msg_lookup_stret movq %rax, %r11 /* Restore all arguments */ - movdqa -0xC0(%rbp), %xmm7 - movdqa -0xB0(%rbp), %xmm6 - movdqa -0xA0(%rbp), %xmm5 - movdqa -0x90(%rbp), %xmm4 - movdqa -0x80(%rbp), %xmm3 - movdqa -0x70(%rbp), %xmm2 - movdqa -0x60(%rbp), %xmm1 - movdqa -0x50(%rbp), %xmm0 + movaps -0xC0(%rbp), %xmm7 + movaps -0xB0(%rbp), %xmm6 + movaps -0xA0(%rbp), %xmm5 + movaps -0x90(%rbp), %xmm4 + movaps -0x80(%rbp), %xmm3 + movaps -0x70(%rbp), %xmm2 + movaps -0x60(%rbp), %xmm1 + movaps -0x50(%rbp), %xmm0 movq -0x38(%rbp), %r9 movq -0x30(%rbp), %r8 movq -0x28(%rbp), %rcx movq -0x20(%rbp), %rdx movq -0x18(%rbp), %rsi Index: src/forwarding/forwarding-x86_64-win64.S ================================================================== --- src/forwarding/forwarding-x86_64-win64.S +++ src/forwarding/forwarding-x86_64-win64.S @@ -30,14 +30,14 @@ movq %rax, -0x28(%rbp) movq %rcx, -0x30(%rbp) movq %rdx, -0x38(%rbp) movq %r8, -0x40(%rbp) movq %r9, -0x48(%rbp) - movdqa %xmm0, -0x60(%rbp) - movdqa %xmm1, -0x70(%rbp) - movdqa %xmm2, -0x80(%rbp) - movdqa %xmm3, -0x90(%rbp) + movaps %xmm0, -0x60(%rbp) + movaps %xmm1, -0x70(%rbp) + movaps %xmm2, -0x80(%rbp) + movaps %xmm3, -0x90(%rbp) call object_getClass movq %rax, %rcx leaq sel_forwardingTargetForSelector_(%rip), %rdx @@ -66,14 +66,14 @@ movq -0x38(%rbp), %rdx call objc_msg_lookup movq %rax, %r11 /* Restore all arguments */ - movdqa -0x90(%rbp), %xmm3 - movdqa -0x80(%rbp), %xmm2 - movdqa -0x70(%rbp), %xmm1 - movdqa -0x60(%rbp), %xmm0 + movaps -0x90(%rbp), %xmm3 + movaps -0x80(%rbp), %xmm2 + movaps -0x70(%rbp), %xmm1 + movaps -0x60(%rbp), %xmm0 movq -0x48(%rbp), %r9 movq -0x40(%rbp), %r8 movq -0x38(%rbp), %rdx movq -0x30(%rbp), %rcx movq -0x28(%rbp), %rax @@ -101,14 +101,14 @@ movq %rax, -0x28(%rbp) movq %rcx, -0x30(%rbp) movq %rdx, -0x38(%rbp) movq %r8, -0x40(%rbp) movq %r9, -0x48(%rbp) - movdqa %xmm0, -0x60(%rbp) - movdqa %xmm1, -0x70(%rbp) - movdqa %xmm2, -0x80(%rbp) - movdqa %xmm3, -0x90(%rbp) + movaps %xmm0, -0x60(%rbp) + movaps %xmm1, -0x70(%rbp) + movaps %xmm2, -0x80(%rbp) + movaps %xmm3, -0x90(%rbp) movq %rdx, %rcx call object_getClass movq %rax, %rcx @@ -138,14 +138,14 @@ movq -0x40(%rbp), %rdx call objc_msg_lookup_stret movq %rax, %r11 /* Restore all arguments */ - movdqa -0x90(%rbp), %xmm3 - movdqa -0x80(%rbp), %xmm2 - movdqa -0x70(%rbp), %xmm1 - movdqa -0x60(%rbp), %xmm0 + movaps -0x90(%rbp), %xmm3 + movaps -0x80(%rbp), %xmm2 + movaps -0x70(%rbp), %xmm1 + movaps -0x60(%rbp), %xmm0 movq -0x48(%rbp), %r9 movq -0x40(%rbp), %r8 movq -0x38(%rbp), %rdx movq -0x30(%rbp), %rcx movq -0x28(%rbp), %rax Index: src/invocation/apple-call-x86_64.S ================================================================== --- src/invocation/apple-call-x86_64.S +++ src/invocation/apple-call-x86_64.S @@ -47,18 +47,18 @@ jmp Lfill_stack Lstack_filled: movb OFFSET_NUM_SSE_USED(%rdi), %al - movdqa OFFSET_SSE_INOUT+112(%rdi), %xmm7 - movdqa OFFSET_SSE_INOUT+96(%rdi), %xmm6 - movdqa OFFSET_SSE_INOUT+80(%rdi), %xmm5 - movdqa OFFSET_SSE_INOUT+64(%rdi), %xmm4 - movdqa OFFSET_SSE_INOUT+48(%rdi), %xmm3 - movdqa OFFSET_SSE_INOUT+32(%rdi), %xmm2 - movdqa OFFSET_SSE_INOUT+16(%rdi), %xmm1 - movdqa OFFSET_SSE_INOUT(%rdi), %xmm0 + movaps OFFSET_SSE_INOUT+112(%rdi), %xmm7 + movaps OFFSET_SSE_INOUT+96(%rdi), %xmm6 + movaps OFFSET_SSE_INOUT+80(%rdi), %xmm5 + movaps OFFSET_SSE_INOUT+64(%rdi), %xmm4 + movaps OFFSET_SSE_INOUT+48(%rdi), %xmm3 + movaps OFFSET_SSE_INOUT+32(%rdi), %xmm2 + movaps OFFSET_SSE_INOUT+16(%rdi), %xmm1 + movaps OFFSET_SSE_INOUT(%rdi), %xmm0 movq OFFSET_GPR_IN+40(%rdi), %r9 movq OFFSET_GPR_IN+32(%rdi), %r8 movq OFFSET_GPR_IN+24(%rdi), %rcx movq OFFSET_GPR_IN+16(%rdi), %rdx @@ -80,12 +80,12 @@ Lafter_send: movq -8(%rbp), %rdi movq %rax, OFFSET_GPR_OUT(%rdi) movq %rdx, OFFSET_GPR_OUT+8(%rdi) - movdqa %xmm0, OFFSET_SSE_INOUT(%rdi) - movdqa %xmm1, OFFSET_SSE_INOUT+16(%rdi) + movaps %xmm0, OFFSET_SSE_INOUT(%rdi) + movaps %xmm1, OFFSET_SSE_INOUT+16(%rdi) movb OFFSET_RETURN_TYPE(%rdi), %r11b cmpb $RETURN_TYPE_X87, %r11b je Lpop_long_double Index: src/invocation/call-x86_64-elf.S ================================================================== --- src/invocation/call-x86_64-elf.S +++ src/invocation/call-x86_64-elf.S @@ -61,18 +61,18 @@ jmp .fill_stack .stack_filled: movb OFFSET_NUM_SSE_USED(%rdi), %al - movdqa OFFSET_SSE_INOUT+112(%rdi), %xmm7 - movdqa OFFSET_SSE_INOUT+96(%rdi), %xmm6 - movdqa OFFSET_SSE_INOUT+80(%rdi), %xmm5 - movdqa OFFSET_SSE_INOUT+64(%rdi), %xmm4 - movdqa OFFSET_SSE_INOUT+48(%rdi), %xmm3 - movdqa OFFSET_SSE_INOUT+32(%rdi), %xmm2 - movdqa OFFSET_SSE_INOUT+16(%rdi), %xmm1 - movdqa OFFSET_SSE_INOUT(%rdi), %xmm0 + movaps OFFSET_SSE_INOUT+112(%rdi), %xmm7 + movaps OFFSET_SSE_INOUT+96(%rdi), %xmm6 + movaps OFFSET_SSE_INOUT+80(%rdi), %xmm5 + movaps OFFSET_SSE_INOUT+64(%rdi), %xmm4 + movaps OFFSET_SSE_INOUT+48(%rdi), %xmm3 + movaps OFFSET_SSE_INOUT+32(%rdi), %xmm2 + movaps OFFSET_SSE_INOUT+16(%rdi), %xmm1 + movaps OFFSET_SSE_INOUT(%rdi), %xmm0 movq OFFSET_GPR_IN+40(%rdi), %r9 movq OFFSET_GPR_IN+32(%rdi), %r8 movq OFFSET_GPR_IN+24(%rdi), %rcx movq OFFSET_GPR_IN+16(%rdi), %rdx @@ -91,12 +91,12 @@ .after_send: movq -8(%rbp), %rdi movq %rax, OFFSET_GPR_OUT(%rdi) movq %rdx, OFFSET_GPR_OUT+8(%rdi) - movdqa %xmm0, OFFSET_SSE_INOUT(%rdi) - movdqa %xmm1, OFFSET_SSE_INOUT+16(%rdi) + movaps %xmm0, OFFSET_SSE_INOUT(%rdi) + movaps %xmm1, OFFSET_SSE_INOUT+16(%rdi) movb OFFSET_RETURN_TYPE(%rdi), %r11b cmpb $RETURN_TYPE_X87, %r11b je .pop_long_double