ObjFW  Check-in [9ba7594f7b]

Overview
Comment:OFMatrix4x4: Fix missing vector reload in SSE
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 9ba7594f7b30df9b258251e1e0cb553c3abe3057738bcca0d2369eb6d58d7236
User & Date: js on 2023-11-06 20:11:51
Other Links: manifest | tags
Context
2023-11-06
20:17
OFMatrix4x4: Use an extra SSE register on AMD64 check-in: 5edf0d083d user: js tags: trunk
20:11
OFMatrix4x4: Fix missing vector reload in SSE check-in: 9ba7594f7b user: js tags: trunk
00:59
OFMatrix4x4: SSE1 for -[transformVectors:count:] check-in: cf955413ab user: js tags: trunk
Changes

Modified src/OFMatrix4x4.m from [ac56c8b767] to [b53637018a].

42
43
44
45
46
47
48
49
50
51
52


53
54
55
56
57
58
59

	__asm__ __volatile__ (
	    "test	%0, %0\n\t"
	    "jz		0f\n"
	    "\n\t"
	    "movaps	(%2), %%xmm0\n\t"
	    "movaps	16(%2), %%xmm1\n\t"
	    "movaps	32(%2), %%xmm2\n\t"
	    "movaps	(%1), %%xmm3\n"
	    "\n\t"
	    "0:\n\t"


	    "movaps	%%xmm0, %%xmm4\n\t"
	    "mulps	%%xmm3, %%xmm4\n\t"
	    "movaps	%%xmm4, (%3)\n\t"
	    "addss	4(%3), %%xmm4\n\t"
	    "addss	8(%3), %%xmm4\n\t"
	    "addss	12(%3), %%xmm4\n"
	    "\n\t"







|
<


>
>







42
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57
58
59
60

	__asm__ __volatile__ (
	    "test	%0, %0\n\t"
	    "jz		0f\n"
	    "\n\t"
	    "movaps	(%2), %%xmm0\n\t"
	    "movaps	16(%2), %%xmm1\n\t"
	    "movaps	32(%2), %%xmm2\n"

	    "\n\t"
	    "0:\n\t"
	    "movaps	(%1), %%xmm3\n"
	    "\n\t"
	    "movaps	%%xmm0, %%xmm4\n\t"
	    "mulps	%%xmm3, %%xmm4\n\t"
	    "movaps	%%xmm4, (%3)\n\t"
	    "addss	4(%3), %%xmm4\n\t"
	    "addss	8(%3), %%xmm4\n\t"
	    "addss	12(%3), %%xmm4\n"
	    "\n\t"

Modified tests/OFMatrix4x4Tests.m from [eefd82d2bd] to [e8ca1aa928].

55
56
57
58
59
60
61

62
63
64
65
66
67
68
69
	    [OFMatrix4x4 matrixWithValues: (const float [4][4]){
		{ 1, 0, 0, 0 },
		{ 0, 1, 0, 0 },
		{ 0, 0, 1, 0 },
		{ 0, 0, 0, 1 }
	    }]])


	TEST(@"-[copy]", (matrix2 = [matrix copy]) && [matrix2 isEqual: matrix])

	TEST(@"-[multiplyWithMatrix:] #1",
	    R([matrix2 multiplyWithMatrix: [OFMatrix4x4 identityMatrix]]) &&
	    [matrix2 isEqual: matrix])

	matrix2 = [OFMatrix4x4 matrixWithValues: (const float [4][4]){
		{  100,  200,  300,  400 },







>
|







55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
	    [OFMatrix4x4 matrixWithValues: (const float [4][4]){
		{ 1, 0, 0, 0 },
		{ 0, 1, 0, 0 },
		{ 0, 0, 1, 0 },
		{ 0, 0, 0, 1 }
	    }]])

	TEST(@"-[copy]", (matrix2 = [[matrix copy] autorelease]) &&
	    [matrix2 isEqual: matrix])

	TEST(@"-[multiplyWithMatrix:] #1",
	    R([matrix2 multiplyWithMatrix: [OFMatrix4x4 identityMatrix]]) &&
	    [matrix2 isEqual: matrix])

	matrix2 = [OFMatrix4x4 matrixWithValues: (const float [4][4]){
		{  100,  200,  300,  400 },