12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
* LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
* file.
*/
#include "config.h"
#import "OFMatrix4x4.h"
#import "OFOnce.h"
#import "OFString.h"
#import "OFString.h"
#import "OFSystemInfo.h"
#import "OFOnce.h"
static const float identityValues[4][4] = {
{ 1, 0, 0, 0 },
{ 0, 1, 0, 0 },
{ 0, 0, 1, 0 },
{ 0, 0, 0, 1 }
};
@implementation OFMatrix4x4
#if (defined(OF_AMD64) || defined(OF_X86)) && defined(HAVE_INTEL_SYNTAX)
static void
multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix)
{
float result[4][4] = {{ 0 }};
for (uint_fast8_t i = 0; i < 4; i++) {
for (uint_fast8_t j = 0; j < 4; j++) {
__asm__ (
"movd mm0, [%2]\n\t"
"punpckldq mm0, [%2 + 16]\n\t"
"pfmul mm0, [%1]\n\t"
"movd mm1, [%2 + 32]\n\t"
"punpckldq mm1, [%2 + 48]\n\t"
"pfmul mm1, [%1 + 8]\n\t"
"pfadd mm0, mm1\n\t"
"movq mm1, mm0\n\t"
"psrlq mm1, 32\n\t"
"pfadd mm0, mm1\n\t"
"movd %0, mm0"
:: "m"(result[i][j]), "r"(&matrix->_values[i][0]),
"r"(&self->_values[0][j])
: "mm0", "mm1", "memory"
);
}
}
__asm__ ("femms");
memcpy(self->_values, result, sizeof(result));
}
+ (void)initialize
{
if (self != [OFMatrix4x4 class])
return;
if ([OFSystemInfo supports3DNow]) {
const SEL selector = @selector(multiplyWithMatrix:);
const char *typeEncoding = method_getTypeEncoding(
class_getInstanceMethod(self, selector));
class_replaceMethod(self, selector,
(IMP)multiplyWithMatrix_3DNow, typeEncoding);
}
}
#endif
+ (OFMatrix4x4 *)identityMatrix
{
return [[[OFMatrix4x4 alloc]
initWithValues: identityValues] autorelease];
}
+ (instancetype)matrixWithValues: (const float [4][4])values
|