ObjFW  Diff

Differences From Artifact [66a4a58f6e]:

To Artifact [b31d5e9464]:


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

101





102
103







104








105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
static void
multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix)
{
	float (*left)[4] = matrix->_values, (*right)[4] = self->_values;
	float result[4][4], (*resultPtr)[4] = result;

	__asm__ __volatile__ (
	    "xorw	%%cx, %%cx\n"
	    "\n\t"
	    "0:\n\t"
	    "movd	(%2), %%mm0\n\t"
	    "punpckldq	16(%2), %%mm0\n\t"
	    "pfmul	(%1), %%mm0\n\t"
	    "movd	32(%2), %%mm1\n\t"
	    "punpckldq  48(%2), %%mm1\n\t"
	    "pfmul	8(%1), %%mm1\n\t"
	    "pfacc	%%mm1, %%mm0\n\t"
	    "pfacc	%%mm0, %%mm0\n\t"
	    "movd	%%mm0, (%0)\n"
	    "\n\t"

	    "add	$4, %0\n\t"





	    "add	$4, %2\n\t"
	    "incb	%%cl\n\t"







	    "cmpb	$4, %%cl\n\t"








	    "jb		0b\n"
	    "\n\t"
	    "add	$16, %1\n\t"
	    "sub	$16, %2\n\t"
	    "xorb	%%cl, %%cl\n\t"
	    "incb	%%ch\n\t"
	    "cmpb	$4, %%ch\n\t"
	    "jb		0b\n"
	    "\n\t"
	    "femms"
	    : "+r"(resultPtr), "+r"(left), "+r"(right)
	    :: "cx", "mm0", "mm1", "memory"
	);

	memcpy(self->_values, result, 16 * sizeof(float));
}

static void
transformVectors_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors,







|










|
|
>
|
>
>
>
>
>
|
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
|

|
|
<
|
<
|



|







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

130

131
132
133
134
135
136
137
138
139
140
141
142
static void
multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix)
{
	float (*left)[4] = matrix->_values, (*right)[4] = self->_values;
	float result[4][4], (*resultPtr)[4] = result;

	__asm__ __volatile__ (
	    "movl	$4, %%ecx\n\t"
	    "\n\t"
	    "0:\n\t"
	    "movd	(%2), %%mm0\n\t"
	    "punpckldq	16(%2), %%mm0\n\t"
	    "pfmul	(%1), %%mm0\n\t"
	    "movd	32(%2), %%mm1\n\t"
	    "punpckldq  48(%2), %%mm1\n\t"
	    "pfmul	8(%1), %%mm1\n\t"
	    "pfacc	%%mm1, %%mm0\n\t"
	    "pfacc	%%mm0, %%mm0\n\t"
	    "movd	%%mm0, (%0)\n\t"
	    "movd	4(%2), %%mm0\n\t"
	    "punpckldq	20(%2), %%mm0\n\t"
	    "pfmul	(%1), %%mm0\n\t"
	    "movd	36(%2), %%mm1\n\t"
	    "punpckldq  52(%2), %%mm1\n\t"
	    "pfmul	8(%1), %%mm1\n\t"
	    "pfacc	%%mm1, %%mm0\n\t"
	    "pfacc	%%mm0, %%mm0\n\t"
	    "movd	%%mm0, 4(%0)\n\t"
	    "movd	8(%2), %%mm0\n\t"
	    "punpckldq	24(%2), %%mm0\n\t"
	    "pfmul	(%1), %%mm0\n\t"
	    "movd	40(%2), %%mm1\n\t"
	    "punpckldq  56(%2), %%mm1\n\t"
	    "pfmul	8(%1), %%mm1\n\t"
	    "pfacc	%%mm1, %%mm0\n\t"
	    "pfacc	%%mm0, %%mm0\n\t"
	    "movd	%%mm0, 8(%0)\n\t"
	    "movd	12(%2), %%mm0\n\t"
	    "punpckldq	28(%2), %%mm0\n\t"
	    "pfmul	(%1), %%mm0\n\t"
	    "movd	44(%2), %%mm1\n\t"
	    "punpckldq  60(%2), %%mm1\n\t"
	    "pfmul	8(%1), %%mm1\n\t"
	    "pfacc	%%mm1, %%mm0\n\t"
	    "pfacc	%%mm0, %%mm0\n\t"
	    "movd	%%mm0, 12(%0)\n"
	    "\n\t"
	    "add	$16, %0\n\t"
	    "add	$16, %1\n\t"

	    "decl	%%ecx\n\t"

	    "jnz	0b\n"
	    "\n\t"
	    "femms"
	    : "+r"(resultPtr), "+r"(left), "+r"(right)
	    :: "ecx", "mm0", "mm1", "memory"
	);

	memcpy(self->_values, result, 16 * sizeof(float));
}

static void
transformVectors_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors,