Overview
Comment: | OFMatrix4x4: Use 3DNow! for multiplication |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
5b213166eed003c3b0ab5d8f30ea9bc5 |
User & Date: | js on 2023-10-30 23:31:27 |
Other Links: | manifest | tags |
Context
2023-10-30
| ||
23:58 | OFMatrix4x4: Use 3DNow! to transform vectors check-in: 1ac0583aae user: js tags: trunk | |
23:31 | OFMatrix4x4: Use 3DNow! for multiplication check-in: 5b213166ee user: js tags: trunk | |
2023-10-29
| ||
12:03 | OFMatrix4x4: Convert multiplication to loop check-in: cf4d6a3dfa user: js tags: trunk | |
Changes
Modified configure.ac from [e4de599d6e] to [ac89f801b1].
︙ | ︙ | |||
329 330 331 332 333 334 335 336 337 338 339 340 341 342 | AX_CHECK_COMPILER_FLAGS(-pipe, [OBJCFLAGS="$OBJCFLAGS -pipe"]) AX_CHECK_COMPILER_FLAGS(-fno-common, [OBJCFLAGS="$OBJCFLAGS -fno-common"]) AX_CHECK_COMPILER_FLAGS(-Xclang -fno-constant-cfstrings, [ flag="-Xclang -fno-constant-cfstrings" OBJCFLAGS="$OBJCFLAGS $flag" OBJFW_OBJCFLAGS="$OBJFW_OBJCFLAGS $flag" ]) AX_CHECK_COMPILER_FLAGS([-Wsign-compare -Werror], [OBJCFLAGS="$OBJCFLAGS -Wsign-compare"]) AS_IF([test x"$with_nds" != x"yes"], [ AX_CHECK_COMPILER_FLAGS([-Wshadow -Werror], [OBJCFLAGS="$OBJCFLAGS -Wshadow"]) ]) AX_CHECK_COMPILER_FLAGS([-Wshorten-64-to-32 -Werror], | > > > > > > > > > > | 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 | AX_CHECK_COMPILER_FLAGS(-pipe, [OBJCFLAGS="$OBJCFLAGS -pipe"]) AX_CHECK_COMPILER_FLAGS(-fno-common, [OBJCFLAGS="$OBJCFLAGS -fno-common"]) AX_CHECK_COMPILER_FLAGS(-Xclang -fno-constant-cfstrings, [ flag="-Xclang -fno-constant-cfstrings" OBJCFLAGS="$OBJCFLAGS $flag" OBJFW_OBJCFLAGS="$OBJFW_OBJCFLAGS $flag" ]) case "$host_cpu" in i*86|x86_64) AX_CHECK_COMPILER_FLAGS([-masm=intel], [ OBJCFLAGS="$OBJCFLAGS -masm=intel" AC_DEFINE(HAVE_INTEL_SYNTAX, 1, [Whether asm syntax is Intel]) ]) ;; esac AX_CHECK_COMPILER_FLAGS([-Wsign-compare -Werror], [OBJCFLAGS="$OBJCFLAGS -Wsign-compare"]) AS_IF([test x"$with_nds" != x"yes"], [ AX_CHECK_COMPILER_FLAGS([-Wshadow -Werror], [OBJCFLAGS="$OBJCFLAGS -Wshadow"]) ]) AX_CHECK_COMPILER_FLAGS([-Wshorten-64-to-32 -Werror], |
︙ | ︙ |
Modified src/OFMatrix4x4.m from [ccf7b44cda] to [5d349ae57d].
︙ | ︙ | |||
12 13 14 15 16 17 18 | * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #include "config.h" #import "OFMatrix4x4.h" | | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #include "config.h" #import "OFMatrix4x4.h" #import "OFString.h" #import "OFSystemInfo.h" #import "OFOnce.h" static const float identityValues[4][4] = { { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 #if (defined(OF_AMD64) || defined(OF_X86)) && defined(HAVE_INTEL_SYNTAX) static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float result[4][4] = {{ 0 }}; for (uint_fast8_t i = 0; i < 4; i++) { for (uint_fast8_t j = 0; j < 4; j++) { __asm__ ( "movd mm0, [%2]\n\t" "punpckldq mm0, [%2 + 16]\n\t" "pfmul mm0, [%1]\n\t" "movd mm1, [%2 + 32]\n\t" "punpckldq mm1, [%2 + 48]\n\t" "pfmul mm1, [%1 + 8]\n\t" "pfadd mm0, mm1\n\t" "movq mm1, mm0\n\t" "psrlq mm1, 32\n\t" "pfadd mm0, mm1\n\t" "movd %0, mm0" :: "m"(result[i][j]), "r"(&matrix->_values[i][0]), "r"(&self->_values[0][j]) : "mm0", "mm1", "memory" ); } } __asm__ ("femms"); memcpy(self->_values, result, sizeof(result)); } + (void)initialize { if (self != [OFMatrix4x4 class]) return; if ([OFSystemInfo supports3DNow]) { const SEL selector = @selector(multiplyWithMatrix:); const char *typeEncoding = method_getTypeEncoding( class_getInstanceMethod(self, selector)); class_replaceMethod(self, selector, (IMP)multiplyWithMatrix_3DNow, typeEncoding); } } #endif + (OFMatrix4x4 *)identityMatrix { return [[[OFMatrix4x4 alloc] initWithValues: identityValues] autorelease]; } + (instancetype)matrixWithValues: (const float [4][4])values |
︙ | ︙ |
Modified src/OFSystemInfo.m from [1b9d36f2fa] to [3c4e0fe6ea].
︙ | ︙ | |||
294 295 296 297 298 299 300 | /* * This workaround is required by older GCC versions when using -fPIC, * as ebx is a special register in PIC code. Yes, GCC is indeed not * able to just push a register onto the stack before the __asm__ block * and to pop it afterwards. */ __asm__ ( | | | | 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | /* * This workaround is required by older GCC versions when using -fPIC, * as ebx is a special register in PIC code. Yes, GCC is indeed not * able to just push a register onto the stack before the __asm__ block * and to pop it afterwards. */ __asm__ ( "xchg{l} { %%ebx, %%edi | edi, ebx }\n\t" "cpuid\n\t" "xchg{l} { %%edi, %%ebx | ebx, edi }" : "=a"(regs.eax), "=D"(regs.ebx), "=c"(regs.ecx), "=d"(regs.edx) : "a"(eax), "c"(ecx) ); # else memset(®s, 0, sizeof(regs)); # endif |
︙ | ︙ |