38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
-
+
-
+
-
+
-
+
-
+
|
multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd,
OFMatrix4x4 *matrix)
{
float result[4][4];
for (uint_fast8_t i = 0; i < 4; i++) {
for (uint_fast8_t j = 0; j < 4; j++) {
__asm__ (
__asm__ __volatile__ (
"movd (%2), %%mm0\n\t"
"punpckldq 16(%2), %%mm0\n\t"
"pfmul (%1), %%mm0\n\t"
"movd 32(%2), %%mm1\n\t"
"punpckldq 48(%2), %%mm1\n\t"
"pfmul 8(%1), %%mm1\n\t"
"pfadd %%mm1, %%mm0\n\t"
"pswapd %%mm0, %%mm1\n\t"
"pfadd %%mm1, %%mm0\n\t"
"movd %%mm0, %0"
:: "m"(result[i][j]), "r"(&matrix->_values[i][0]),
"r"(&self->_values[0][j])
: "mm0", "mm1", "memory"
);
}
}
__asm__ ("femms");
__asm__ __volatile__ ("femms");
memcpy(self->_values, result, sizeof(result));
}
static void
multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix)
{
float result[4][4];
for (uint_fast8_t i = 0; i < 4; i++) {
for (uint_fast8_t j = 0; j < 4; j++) {
__asm__ (
__asm__ __volatile__ (
"movd (%2), %%mm0\n\t"
"punpckldq 16(%2), %%mm0\n\t"
"pfmul (%1), %%mm0\n\t"
"movd 32(%2), %%mm1\n\t"
"punpckldq 48(%2), %%mm1\n\t"
"pfmul 8(%1), %%mm1\n\t"
"pfadd %%mm1, %%mm0\n\t"
"movq %%mm0, %%mm1\n\t"
"psrlq $32, %%mm1\n\t"
"pfadd %%mm1, %%mm0\n\t"
"movd %%mm0, %0"
:: "m"(result[i][j]), "r"(&matrix->_values[i][0]),
"r"(&self->_values[0][j])
: "mm0", "mm1", "memory"
);
}
}
__asm__ ("femms");
__asm__ __volatile__ ("femms");
memcpy(self->_values, result, sizeof(result));
}
static void
transformVectors_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors,
size_t count)
{
__asm__ (
__asm__ __volatile__ (
"0:\n\t"
"test %0, %0\n\t"
"jz 0f\n"
"\n\t"
"movq (%1), %%mm0\n\t"
"movq 8(%1), %%mm1\n"
"\n\t"
|
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
-
+
|
);
}
static void
transformVectors_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors,
size_t count)
{
__asm__ (
__asm__ __volatile__ (
"0:\n\t"
"test %0, %0\n\t"
"jz 0f\n"
"\n\t"
"movq (%1), %%mm0\n\t"
"movq 8(%1), %%mm1\n"
"\n\t"
|