Overview
Comment: | Don't use -masm=intel
It's broken in older versions of Clang (e.g. Clang 12). |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
d9af65de971c5ae1e4544916cc11e604 |
User & Date: | js on 2023-10-31 20:25:57 |
Other Links: | manifest | tags |
Context
2023-10-31
| ||
20:27 | OFMatrix4x4: Partially unroll multiplication loop check-in: d53c87e7bb user: js tags: trunk | |
20:25 | Don't use -masm=intel check-in: d9af65de97 user: js tags: trunk | |
2023-10-30
| ||
23:58 | OFMatrix4x4: Use 3DNow! to transform vectors check-in: 1ac0583aae user: js tags: trunk | |
Changes
Modified configure.ac from [ac89f801b1] to [3d6a6f352e].
︙ | ︙ | |||
330 331 332 333 334 335 336 | AX_CHECK_COMPILER_FLAGS(-fno-common, [OBJCFLAGS="$OBJCFLAGS -fno-common"]) AX_CHECK_COMPILER_FLAGS(-Xclang -fno-constant-cfstrings, [ flag="-Xclang -fno-constant-cfstrings" OBJCFLAGS="$OBJCFLAGS $flag" OBJFW_OBJCFLAGS="$OBJFW_OBJCFLAGS $flag" ]) | < < < < < < < < < | 330 331 332 333 334 335 336 337 338 339 340 341 342 343 | AX_CHECK_COMPILER_FLAGS(-fno-common, [OBJCFLAGS="$OBJCFLAGS -fno-common"]) AX_CHECK_COMPILER_FLAGS(-Xclang -fno-constant-cfstrings, [ flag="-Xclang -fno-constant-cfstrings" OBJCFLAGS="$OBJCFLAGS $flag" OBJFW_OBJCFLAGS="$OBJFW_OBJCFLAGS $flag" ]) AX_CHECK_COMPILER_FLAGS([-Wsign-compare -Werror], [OBJCFLAGS="$OBJCFLAGS -Wsign-compare"]) AS_IF([test x"$with_nds" != x"yes"], [ AX_CHECK_COMPILER_FLAGS([-Wshadow -Werror], [OBJCFLAGS="$OBJCFLAGS -Wshadow"]) ]) AX_CHECK_COMPILER_FLAGS([-Wshorten-64-to-32 -Werror], |
︙ | ︙ |
Modified src/OFMatrix4x4.m from [9f0079818c] to [c5f86994f5].
︙ | ︙ | |||
25 26 27 28 29 30 31 | { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 #if defined(OF_AMD64) || defined(OF_X86) static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float result[4][4]; for (uint_fast8_t i = 0; i < 4; i++) { for (uint_fast8_t j = 0; j < 4; j++) { __asm__ ( "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" "pfmul (%1), %%mm0\n\t" "movd 32(%2), %%mm1\n\t" "punpckldq 48(%2), %%mm1\n\t" "pfmul 8(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movd %%mm0, %0" :: "m"(result[i][j]), "r"(&matrix->_values[i][0]), "r"(&self->_values[0][j]) : "mm0", "mm1", "memory" ); } } __asm__ ("femms"); memcpy(self->_values, result, sizeof(result)); } static OFVector4D transformedVector_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D vector) { OFVector4D result; __asm__ ( "movq (%2), %%mm0\n\t" "movq 8(%2), %%mm1\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "pfmul (%1), %%mm2\n\t" "pfmul 8(%1), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "movq %%mm2, %%mm3\n\t" "psrlq $32, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "pfmul 16(%1), %%mm3\n\t" "pfmul 24(%1), %%mm4\n\t" "pfadd %%mm4, %%mm3\n\t" "movq %%mm3, %%mm4\n\t" "psrlq $32, %%mm4\n\t" "pfadd %%mm4, %%mm3\n" "\n\t" "punpckldq %%mm3, %%mm2\n\t" "movq %%mm2, (%0)\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "pfmul 32(%1), %%mm2\n\t" "pfmul 40(%1), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "movq %%mm2, %%mm3\n\t" "psrlq $32, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" "pfmul 48(%1), %%mm0\n\t" "pfmul 56(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfadd %%mm1, %%mm0\n" "\n\t" "punpckldq %%mm0, %%mm2\n\t" "movq %%mm2, 8(%0)\n" "\n\t" "femms" :: "r"(&result), "r"(&self->_values), "r"(&vector) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); return result; |
︙ | ︙ |
Modified src/OFSystemInfo.m from [3c4e0fe6ea] to [1b9d36f2fa].
︙ | ︙ | |||
294 295 296 297 298 299 300 | /* * This workaround is required by older GCC versions when using -fPIC, * as ebx is a special register in PIC code. Yes, GCC is indeed not * able to just push a register onto the stack before the __asm__ block * and to pop it afterwards. */ __asm__ ( | | | | 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | /* * This workaround is required by older GCC versions when using -fPIC, * as ebx is a special register in PIC code. Yes, GCC is indeed not * able to just push a register onto the stack before the __asm__ block * and to pop it afterwards. */ __asm__ ( "xchgl %%ebx, %%edi\n\t" "cpuid\n\t" "xchgl %%edi, %%ebx" : "=a"(regs.eax), "=D"(regs.ebx), "=c"(regs.ecx), "=d"(regs.edx) : "a"(eax), "c"(ecx) ); # else memset(®s, 0, sizeof(regs)); # endif |
︙ | ︙ |