ObjFW  Diff

Differences From Artifact [9f0079818c]:

To Artifact [c5f86994f5]:


25
26
27
28
29
30
31
32

33
34
35
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51











52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71


72
73
74
75
76
77
78
79
80








81
82
83
84
85
86
87
88
89








90
91
92


93
94
95
96
97
98
99
100
101








102
103
104
105
106
107
108






109
110
111


112
113
114
115
116
117
118
25
26
27
28
29
30
31

32
33
34
35

36
37
38
39
40











41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69


70
71
72








73
74
75
76
77
78
79
80
81








82
83
84
85
86
87
88
89
90


91
92
93








94
95
96
97
98
99
100
101
102






103
104
105
106
107
108
109


110
111
112
113
114
115
116
117
118







-
+



-
+




-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+


















-
-
+
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
+
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+

-
-
-
-
-
-
+
+
+
+
+
+

-
-
+
+







	{ 1, 0, 0, 0 },
	{ 0, 1, 0, 0 },
	{ 0, 0, 1, 0 },
	{ 0, 0, 0, 1 }
};

@implementation OFMatrix4x4
#if (defined(OF_AMD64) || defined(OF_X86)) && defined(HAVE_INTEL_SYNTAX)
#if defined(OF_AMD64) || defined(OF_X86)
static void
multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix)
{
	float result[4][4] = {{ 0 }};
	float result[4][4];

	for (uint_fast8_t i = 0; i < 4; i++) {
		for (uint_fast8_t j = 0; j < 4; j++) {
			__asm__ (
			    "movd	mm0, [%2]\n\t"
			    "punpckldq	mm0, [%2 + 16]\n\t"
			    "pfmul	mm0, [%1]\n\t"
			    "movd	mm1, [%2 + 32]\n\t"
			    "punpckldq	mm1, [%2 + 48]\n\t"
			    "pfmul	mm1, [%1 + 8]\n\t"
			    "pfadd	mm0, mm1\n\t"
			    "movq	mm1, mm0\n\t"
			    "psrlq	mm1, 32\n\t"
			    "pfadd	mm0, mm1\n\t"
			    "movd	%0, mm0"
			    "movd	(%2), %%mm0\n\t"
			    "punpckldq	16(%2), %%mm0\n\t"
			    "pfmul	(%1), %%mm0\n\t"
			    "movd	32(%2), %%mm1\n\t"
			    "punpckldq	48(%2), %%mm1\n\t"
			    "pfmul	8(%1), %%mm1\n\t"
			    "pfadd	%%mm1, %%mm0\n\t"
			    "movq	%%mm0, %%mm1\n\t"
			    "psrlq	$32, %%mm1\n\t"
			    "pfadd	%%mm1, %%mm0\n\t"
			    "movd	%%mm0, %0"
			    :: "m"(result[i][j]), "r"(&matrix->_values[i][0]),
			       "r"(&self->_values[0][j])
			    : "mm0", "mm1", "memory"
			);
		}
	}

	__asm__ ("femms");

	memcpy(self->_values, result, sizeof(result));
}

static OFVector4D
transformedVector_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D vector)
{
	OFVector4D result;

	__asm__ (
	    "movq	mm0, [%2]\n\t"
	    "movq	mm1, [%2 + 8]\n"
	    "movq	(%2), %%mm0\n\t"
	    "movq	8(%2), %%mm1\n"
	    "\n\t"
	    "movq	mm2, mm0\n\t"
	    "movq	mm3, mm1\n\t"
	    "pfmul	mm2, [%1]\n\t"
	    "pfmul	mm3, [%1 + 8]\n\t"
	    "pfadd	mm2, mm3\n\t"
	    "movq	mm3, mm2\n\t"
	    "psrlq	mm3, 32\n\t"
	    "pfadd	mm2, mm3\n"
	    "movq	%%mm0, %%mm2\n\t"
	    "movq	%%mm1, %%mm3\n\t"
	    "pfmul	(%1), %%mm2\n\t"
	    "pfmul	8(%1), %%mm3\n\t"
	    "pfadd	%%mm3, %%mm2\n\t"
	    "movq	%%mm2, %%mm3\n\t"
	    "psrlq	$32, %%mm3\n\t"
	    "pfadd	%%mm3, %%mm2\n"
	    "\n\t"
	    "movq	mm3, mm0\n\t"
	    "movq	mm4, mm1\n\t"
	    "pfmul	mm3, [%1 + 16]\n\t"
	    "pfmul	mm4, [%1 + 24]\n\t"
	    "pfadd	mm3, mm4\n\t"
	    "movq	mm4, mm3\n\t"
	    "psrlq	mm4, 32\n\t"
	    "pfadd	mm3, mm4\n"
	    "movq	%%mm0, %%mm3\n\t"
	    "movq	%%mm1, %%mm4\n\t"
	    "pfmul	16(%1), %%mm3\n\t"
	    "pfmul	24(%1), %%mm4\n\t"
	    "pfadd	%%mm4, %%mm3\n\t"
	    "movq	%%mm3, %%mm4\n\t"
	    "psrlq	$32, %%mm4\n\t"
	    "pfadd	%%mm4, %%mm3\n"
	    "\n\t"
	    "punpckldq	mm2, mm3\n\t"
	    "movq	[%0], mm2\n"
	    "punpckldq	%%mm3, %%mm2\n\t"
	    "movq	%%mm2, (%0)\n"
	    "\n\t"
	    "movq	mm2, mm0\n\t"
	    "movq	mm3, mm1\n\t"
	    "pfmul	mm2, [%1 + 32]\n\t"
	    "pfmul	mm3, [%1 + 40]\n\t"
	    "pfadd	mm2, mm3\n\t"
	    "movq	mm3, mm2\n\t"
	    "psrlq	mm3, 32\n\t"
	    "pfadd	mm2, mm3\n"
	    "movq	%%mm0, %%mm2\n\t"
	    "movq	%%mm1, %%mm3\n\t"
	    "pfmul	32(%1), %%mm2\n\t"
	    "pfmul	40(%1), %%mm3\n\t"
	    "pfadd	%%mm3, %%mm2\n\t"
	    "movq	%%mm2, %%mm3\n\t"
	    "psrlq	$32, %%mm3\n\t"
	    "pfadd	%%mm3, %%mm2\n"
	    "\n\t"
	    "pfmul	mm0, [%1 + 48]\n\t"
	    "pfmul	mm1, [%1 + 56]\n\t"
	    "pfadd	mm0, mm1\n\t"
	    "movq	mm1, mm0\n\t"
	    "psrlq	mm1, 32\n\t"
	    "pfadd	mm0, mm1\n"
	    "pfmul	48(%1), %%mm0\n\t"
	    "pfmul	56(%1), %%mm1\n\t"
	    "pfadd	%%mm1, %%mm0\n\t"
	    "movq	%%mm0, %%mm1\n\t"
	    "psrlq	$32, %%mm1\n\t"
	    "pfadd	%%mm1, %%mm0\n"
	    "\n\t"
	    "punpckldq	mm2, mm0\n\t"
	    "movq	[%0 + 8], mm2\n"
	    "punpckldq	%%mm0, %%mm2\n\t"
	    "movq	%%mm2, 8(%0)\n"
	    "\n\t"
	    "femms"
	    :: "r"(&result), "r"(&self->_values), "r"(&vector)
	    : "mm0", "mm1", "mm2", "mm3", "mm4", "memory"
	);

	return result;