ObjFW  Check-in [5b213166ee]

Overview
Comment:OFMatrix4x4: Use 3DNow! for multiplication
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 5b213166eed003c3b0ab5d8f30ea9bc510f4e0b403385131fdb5516e0f6d4f81
User & Date: js on 2023-10-30 23:31:27
Other Links: manifest | tags
Context
2023-10-30
23:58
OFMatrix4x4: Use 3DNow! to transform vectors check-in: 1ac0583aae user: js tags: trunk
23:31
OFMatrix4x4: Use 3DNow! for multiplication check-in: 5b213166ee user: js tags: trunk
2023-10-29
12:03
OFMatrix4x4: Convert multiplication to loop check-in: cf4d6a3dfa user: js tags: trunk
Changes

Modified configure.ac from [e4de599d6e] to [ac89f801b1].

329
330
331
332
333
334
335










336
337
338
339
340
341
342
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352







+
+
+
+
+
+
+
+
+
+







AX_CHECK_COMPILER_FLAGS(-pipe, [OBJCFLAGS="$OBJCFLAGS -pipe"])
AX_CHECK_COMPILER_FLAGS(-fno-common, [OBJCFLAGS="$OBJCFLAGS -fno-common"])
AX_CHECK_COMPILER_FLAGS(-Xclang -fno-constant-cfstrings, [
	flag="-Xclang -fno-constant-cfstrings"
	OBJCFLAGS="$OBJCFLAGS $flag"
	OBJFW_OBJCFLAGS="$OBJFW_OBJCFLAGS $flag"
])

case "$host_cpu" in
i*86|x86_64)
	AX_CHECK_COMPILER_FLAGS([-masm=intel], [
		OBJCFLAGS="$OBJCFLAGS -masm=intel"
		AC_DEFINE(HAVE_INTEL_SYNTAX, 1, [Whether asm syntax is Intel])
	])
	;;
esac

AX_CHECK_COMPILER_FLAGS([-Wsign-compare -Werror],
	[OBJCFLAGS="$OBJCFLAGS -Wsign-compare"])
AS_IF([test x"$with_nds" != x"yes"], [
	AX_CHECK_COMPILER_FLAGS([-Wshadow -Werror],
		[OBJCFLAGS="$OBJCFLAGS -Wshadow"])
])
AX_CHECK_COMPILER_FLAGS([-Wshorten-64-to-32 -Werror],

Modified src/OFMatrix4x4.m from [ccf7b44cda] to [5d349ae57d].

12
13
14
15
16
17
18
19
20




21
22
23
24
25
26
27
28
29















































30
31
32
33
34
35
36
12
13
14
15
16
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85







-
-
+
+
+
+









+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







 * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
 * file.
 */

#include "config.h"

#import "OFMatrix4x4.h"
#import "OFOnce.h"
#import "OFString.h"
#import "OFString.h"
#import "OFSystemInfo.h"

#import "OFOnce.h"

static const float identityValues[4][4] = {
	{ 1, 0, 0, 0 },
	{ 0, 1, 0, 0 },
	{ 0, 0, 1, 0 },
	{ 0, 0, 0, 1 }
};

@implementation OFMatrix4x4
#if (defined(OF_AMD64) || defined(OF_X86)) && defined(HAVE_INTEL_SYNTAX)
static void
multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix)
{
	float result[4][4] = {{ 0 }};

	for (uint_fast8_t i = 0; i < 4; i++) {
		for (uint_fast8_t j = 0; j < 4; j++) {
			__asm__ (
			    "movd	mm0, [%2]\n\t"
			    "punpckldq	mm0, [%2 + 16]\n\t"
			    "pfmul	mm0, [%1]\n\t"
			    "movd	mm1, [%2 + 32]\n\t"
			    "punpckldq	mm1, [%2 + 48]\n\t"
			    "pfmul	mm1, [%1 + 8]\n\t"
			    "pfadd	mm0, mm1\n\t"
			    "movq	mm1, mm0\n\t"
			    "psrlq	mm1, 32\n\t"
			    "pfadd	mm0, mm1\n\t"
			    "movd	%0, mm0"
			    :: "m"(result[i][j]), "r"(&matrix->_values[i][0]),
			       "r"(&self->_values[0][j])
			    : "mm0", "mm1", "memory"
			);
		}
	}

	__asm__ ("femms");

	memcpy(self->_values, result, sizeof(result));
}

+ (void)initialize
{
	if (self != [OFMatrix4x4 class])
		return;

	if ([OFSystemInfo supports3DNow]) {
		const SEL selector = @selector(multiplyWithMatrix:);
		const char *typeEncoding = method_getTypeEncoding(
		    class_getInstanceMethod(self, selector));
		class_replaceMethod(self, selector,
		    (IMP)multiplyWithMatrix_3DNow, typeEncoding);
	}
}
#endif

+ (OFMatrix4x4 *)identityMatrix
{
	return [[[OFMatrix4x4 alloc]
	    initWithValues: identityValues] autorelease];
}

+ (instancetype)matrixWithValues: (const float [4][4])values

Modified src/OFSystemInfo.m from [1b9d36f2fa] to [3c4e0fe6ea].

294
295
296
297
298
299
300
301

302
303

304
305
306
307
308
309
310
294
295
296
297
298
299
300

301
302

303
304
305
306
307
308
309
310







-
+

-
+







	/*
	 * This workaround is required by older GCC versions when using -fPIC,
	 * as ebx is a special register in PIC code. Yes, GCC is indeed not
	 * able to just push a register onto the stack before the __asm__ block
	 * and to pop it afterwards.
	 */
	__asm__ (
	    "xchgl	%%ebx, %%edi\n\t"
	    "xchg{l}	{ %%ebx, %%edi | edi, ebx }\n\t"
	    "cpuid\n\t"
	    "xchgl	%%edi, %%ebx"
	    "xchg{l}	{ %%edi, %%ebx | ebx, edi }"
	    : "=a"(regs.eax), "=D"(regs.ebx), "=c"(regs.ecx), "=d"(regs.edx)
	    : "a"(eax), "c"(ecx)
	);
# else
	memset(&regs, 0, sizeof(regs));
# endif