Index: configure.ac ================================================================== --- configure.ac +++ configure.ac @@ -332,19 +332,10 @@ flag="-Xclang -fno-constant-cfstrings" OBJCFLAGS="$OBJCFLAGS $flag" OBJFW_OBJCFLAGS="$OBJFW_OBJCFLAGS $flag" ]) -case "$host_cpu" in -i*86|x86_64) - AX_CHECK_COMPILER_FLAGS([-masm=intel], [ - OBJCFLAGS="$OBJCFLAGS -masm=intel" - AC_DEFINE(HAVE_INTEL_SYNTAX, 1, [Whether asm syntax is Intel]) - ]) - ;; -esac - AX_CHECK_COMPILER_FLAGS([-Wsign-compare -Werror], [OBJCFLAGS="$OBJCFLAGS -Wsign-compare"]) AS_IF([test x"$with_nds" != x"yes"], [ AX_CHECK_COMPILER_FLAGS([-Wshadow -Werror], [OBJCFLAGS="$OBJCFLAGS -Wshadow"]) Index: src/OFMatrix4x4.m ================================================================== --- src/OFMatrix4x4.m +++ src/OFMatrix4x4.m @@ -27,30 +27,30 @@ { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 -#if (defined(OF_AMD64) || defined(OF_X86)) && defined(HAVE_INTEL_SYNTAX) +#if defined(OF_AMD64) || defined(OF_X86) static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { - float result[4][4] = {{ 0 }}; + float result[4][4]; for (uint_fast8_t i = 0; i < 4; i++) { for (uint_fast8_t j = 0; j < 4; j++) { __asm__ ( - "movd mm0, [%2]\n\t" - "punpckldq mm0, [%2 + 16]\n\t" - "pfmul mm0, [%1]\n\t" - "movd mm1, [%2 + 32]\n\t" - "punpckldq mm1, [%2 + 48]\n\t" - "pfmul mm1, [%1 + 8]\n\t" - "pfadd mm0, mm1\n\t" - "movq mm1, mm0\n\t" - "psrlq mm1, 32\n\t" - "pfadd mm0, mm1\n\t" - "movd %0, mm0" + "movd (%2), %%mm0\n\t" + "punpckldq 16(%2), %%mm0\n\t" + "pfmul (%1), %%mm0\n\t" + "movd 32(%2), %%mm1\n\t" + "punpckldq 48(%2), %%mm1\n\t" + "pfmul 8(%1), %%mm1\n\t" + "pfadd %%mm1, %%mm0\n\t" + "movq %%mm0, %%mm1\n\t" + "psrlq $32, %%mm1\n\t" + "pfadd %%mm1, %%mm0\n\t" + "movd %%mm0, %0" :: "m"(result[i][j]), "r"(&matrix->_values[i][0]), "r"(&self->_values[0][j]) : "mm0", "mm1", "memory" ); } @@ -65,52 +65,52 @@ transformedVector_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D vector) { OFVector4D result; __asm__ ( - "movq mm0, [%2]\n\t" - "movq mm1, [%2 + 8]\n" - "\n\t" - "movq mm2, mm0\n\t" - "movq mm3, mm1\n\t" - "pfmul mm2, [%1]\n\t" - "pfmul mm3, [%1 + 8]\n\t" - "pfadd mm2, mm3\n\t" - "movq mm3, mm2\n\t" - "psrlq mm3, 32\n\t" - "pfadd mm2, mm3\n" - "\n\t" - "movq mm3, mm0\n\t" - "movq mm4, mm1\n\t" - "pfmul mm3, [%1 + 16]\n\t" - "pfmul mm4, [%1 + 24]\n\t" - "pfadd mm3, mm4\n\t" - "movq mm4, mm3\n\t" - "psrlq mm4, 32\n\t" - "pfadd mm3, mm4\n" - "\n\t" - "punpckldq mm2, mm3\n\t" - "movq [%0], mm2\n" - "\n\t" - "movq mm2, mm0\n\t" - "movq mm3, mm1\n\t" - "pfmul mm2, [%1 + 32]\n\t" - "pfmul mm3, [%1 + 40]\n\t" - "pfadd mm2, mm3\n\t" - "movq mm3, mm2\n\t" - "psrlq mm3, 32\n\t" - "pfadd mm2, mm3\n" - "\n\t" - "pfmul mm0, [%1 + 48]\n\t" - "pfmul mm1, [%1 + 56]\n\t" - "pfadd mm0, mm1\n\t" - "movq mm1, mm0\n\t" - "psrlq mm1, 32\n\t" - "pfadd mm0, mm1\n" - "\n\t" - "punpckldq mm2, mm0\n\t" - "movq [%0 + 8], mm2\n" + "movq (%2), %%mm0\n\t" + "movq 8(%2), %%mm1\n" + "\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "pfmul (%1), %%mm2\n\t" + "pfmul 8(%1), %%mm3\n\t" + "pfadd %%mm3, %%mm2\n\t" + "movq %%mm2, %%mm3\n\t" + "psrlq $32, %%mm3\n\t" + "pfadd %%mm3, %%mm2\n" + "\n\t" + "movq %%mm0, %%mm3\n\t" + "movq %%mm1, %%mm4\n\t" + "pfmul 16(%1), %%mm3\n\t" + "pfmul 24(%1), %%mm4\n\t" + "pfadd %%mm4, %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "psrlq $32, %%mm4\n\t" + "pfadd %%mm4, %%mm3\n" + "\n\t" + "punpckldq %%mm3, %%mm2\n\t" + "movq %%mm2, (%0)\n" + "\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "pfmul 32(%1), %%mm2\n\t" + "pfmul 40(%1), %%mm3\n\t" + "pfadd %%mm3, %%mm2\n\t" + "movq %%mm2, %%mm3\n\t" + "psrlq $32, %%mm3\n\t" + "pfadd %%mm3, %%mm2\n" + "\n\t" + "pfmul 48(%1), %%mm0\n\t" + "pfmul 56(%1), %%mm1\n\t" + "pfadd %%mm1, %%mm0\n\t" + "movq %%mm0, %%mm1\n\t" + "psrlq $32, %%mm1\n\t" + "pfadd %%mm1, %%mm0\n" + "\n\t" + "punpckldq %%mm0, %%mm2\n\t" + "movq %%mm2, 8(%0)\n" "\n\t" "femms" :: "r"(&result), "r"(&self->_values), "r"(&vector) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); Index: src/OFSystemInfo.m ================================================================== --- src/OFSystemInfo.m +++ src/OFSystemInfo.m @@ -296,13 +296,13 @@ * as ebx is a special register in PIC code. Yes, GCC is indeed not * able to just push a register onto the stack before the __asm__ block * and to pop it afterwards. */ __asm__ ( - "xchg{l} { %%ebx, %%edi | edi, ebx }\n\t" + "xchgl %%ebx, %%edi\n\t" "cpuid\n\t" - "xchg{l} { %%edi, %%ebx | ebx, edi }" + "xchgl %%edi, %%ebx" : "=a"(regs.eax), "=D"(regs.ebx), "=c"(regs.ecx), "=d"(regs.edx) : "a"(eax), "c"(ecx) ); # else memset(®s, 0, sizeof(regs));