Differences From Artifact [ccf7b44cda]:
- File
src/OFMatrix4x4.m
— part of check-in
[cf4d6a3dfa]
at
2023-10-29 12:03:10
on branch trunk
— OFMatrix4x4: Convert multiplication to loop
This should make vectorization easier. (user: js, size: 3699) [annotate] [blame] [check-ins using]
To Artifact [5d349ae57d]:
- File src/OFMatrix4x4.m — part of check-in [5b213166ee] at 2023-10-30 23:31:27 on branch trunk — OFMatrix4x4: Use 3DNow! for multiplication (user: js, size: 4918) [annotate] [blame] [check-ins using]
︙ | ︙ | |||
12 13 14 15 16 17 18 | * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #include "config.h" #import "OFMatrix4x4.h" | | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #include "config.h" #import "OFMatrix4x4.h" #import "OFString.h" #import "OFSystemInfo.h" #import "OFOnce.h" static const float identityValues[4][4] = { { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 #if (defined(OF_AMD64) || defined(OF_X86)) && defined(HAVE_INTEL_SYNTAX) static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float result[4][4] = {{ 0 }}; for (uint_fast8_t i = 0; i < 4; i++) { for (uint_fast8_t j = 0; j < 4; j++) { __asm__ ( "movd mm0, [%2]\n\t" "punpckldq mm0, [%2 + 16]\n\t" "pfmul mm0, [%1]\n\t" "movd mm1, [%2 + 32]\n\t" "punpckldq mm1, [%2 + 48]\n\t" "pfmul mm1, [%1 + 8]\n\t" "pfadd mm0, mm1\n\t" "movq mm1, mm0\n\t" "psrlq mm1, 32\n\t" "pfadd mm0, mm1\n\t" "movd %0, mm0" :: "m"(result[i][j]), "r"(&matrix->_values[i][0]), "r"(&self->_values[0][j]) : "mm0", "mm1", "memory" ); } } __asm__ ("femms"); memcpy(self->_values, result, sizeof(result)); } + (void)initialize { if (self != [OFMatrix4x4 class]) return; if ([OFSystemInfo supports3DNow]) { const SEL selector = @selector(multiplyWithMatrix:); const char *typeEncoding = method_getTypeEncoding( class_getInstanceMethod(self, selector)); class_replaceMethod(self, selector, (IMP)multiplyWithMatrix_3DNow, typeEncoding); } } #endif + (OFMatrix4x4 *)identityMatrix { return [[[OFMatrix4x4 alloc] initWithValues: identityValues] autorelease]; } + (instancetype)matrixWithValues: (const float [4][4])values |
︙ | ︙ |