Comment: | OFMatrix4x4: SSE4.1 for -[transformVectors:count:]
This requires the vectors to be 16 byte aligned. In order to achieve |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
7f304f573b03eccbb08908fdebe6ec34 |
User & Date: | js on 2023-11-03 00:16:18 |
Other Links: | manifest | tags |
2023-11-03
| ||
01:07 | OFMatrix4x4: Remove SSE4.1 due to Clang bugs check-in: 7e1dbda4b4 user: js tags: trunk | |
00:16 | OFMatrix4x4: SSE4.1 for -[transformVectors:count:] check-in: 7f304f573b user: js tags: trunk | |
2023-11-02
| ||
23:00 | OFMatrix4x4: Minor cleanups check-in: b5c3a36731 user: js tags: trunk | |
Modified src/OFMatrix4x4.h from [5f3106a61c] to [12afeb3f7a].
︙ | ︙ | |||
19 20 21 22 23 24 25 | /** * @brief A 4x4 matrix of floats. */ OF_SUBCLASSING_RESTRICTED @interface OFMatrix4x4: OFObject <OFCopying> { | | | 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | /** * @brief A 4x4 matrix of floats. */ OF_SUBCLASSING_RESTRICTED @interface OFMatrix4x4: OFObject <OFCopying> { float _values[4][4] OF_ALIGN(16); } #ifdef OF_HAVE_CLASS_PROPERTIES @property (readonly, class) OFMatrix4x4 *identityMatrix; #endif /** |
︙ | ︙ |
Modified src/OFMatrix4x4.m from [740fcdd16f] to [46f1ac4383].
︙ | ︙ | |||
26 27 28 29 30 31 32 33 34 35 36 37 38 39 | { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 #if (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) # ifndef __clang__ # pragma GCC push_options # pragma GCC target("3dnow,3dnowa") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 #if (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) # ifndef __clang__ # pragma GCC push_options # pragma GCC target("sse4.1") # endif static void transformVectors_SSE41(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, size_t count) { __asm__ __volatile__ ( "test %0, %0\n\t" "jz 0f\n" "\n\t" "movaps (%2), %%xmm0\n\t" "movaps 16(%2), %%xmm1\n\t" "movaps 32(%2), %%xmm2\n\t" "movaps 48(%2), %%xmm3\n" "\n\t" "0:\n\t" "movaps (%1), %%xmm4\n\t" "movaps %%xmm4, %%xmm5\n\t" "dpps $0xFF, %%xmm0, %%xmm4\n\t" "movaps %%xmm5, %%xmm6\n\t" "dpps $0xFF, %%xmm1, %%xmm5\n\t" "movaps %%xmm6, %%xmm7\n\t" "dpps $0xFF, %%xmm2, %%xmm6\n\t" "dpps $0xFF, %%xmm3, %%xmm7\n\t" "insertps $0x10, %%xmm5, %%xmm4\n\t" "insertps $0x20, %%xmm6, %%xmm4\n\t" "insertps $0x30, %%xmm7, %%xmm4\n\t" "movaps %%xmm4, (%1)\n" "\n\t" "add $16, %1\n\t" "dec %0\n\t" "jnz 0b\n" : "+r"(count), "+r"(vectors) : "r"(&self->_values) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } # ifndef __clang__ # pragma GCC pop_options # endif # ifndef __clang__ # pragma GCC push_options # pragma GCC target("3dnow,3dnowa") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) |
︙ | ︙ | |||
262 263 264 265 266 267 268 | return; # define REPLACE(selector, func) \ typeEncoding = method_getTypeEncoding( \ class_getInstanceMethod(self, selector)); \ class_replaceMethod(self, selector, (IMP)func, typeEncoding); | | > > > | 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 | return; # define REPLACE(selector, func) \ typeEncoding = method_getTypeEncoding( \ class_getInstanceMethod(self, selector)); \ class_replaceMethod(self, selector, (IMP)func, typeEncoding); if ([OFSystemInfo supportsSSE41]) { REPLACE(@selector(transformVectors:count:), transformVectors_SSE41) } else if ([OFSystemInfo supports3DNow]) { if ([OFSystemInfo supportsEnhanced3DNow]) { REPLACE(@selector(multiplyWithMatrix:), multiplyWithMatrix_enhanced3DNow) REPLACE(@selector(transformVectors:count:), transformVectors_enhanced3DNow) } else { REPLACE(@selector(multiplyWithMatrix:), |
︙ | ︙ |
Modified src/OFObject.h from [6d396e3db6] to [ea05d80823].
︙ | ︙ | |||
345 346 347 348 349 350 351 | } /** * @struct OFVector4D OFObject.h ObjFW/OFObject.h * * @brief A vector in 4D space. */ | | | 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 | } /** * @struct OFVector4D OFObject.h ObjFW/OFObject.h * * @brief A vector in 4D space. */ typedef struct OF_BOXABLE OF_ALIGN(16) OFVector4D { /** The x coordinate of the vector */ float x; /** The y coordinate of the vector */ float y; /** The z coordinate of the vector */ float z; /** The w coordinate of the vector */ |
︙ | ︙ |
Modified src/macros.h from [d9f6837658] to [c4d1f32dd0].
︙ | ︙ | |||
97 98 99 100 101 102 103 104 105 106 107 108 109 | # define OF_INLINE inline # define OF_LIKELY(cond) (cond) # define OF_UNLIKELY(cond) (cond) # define OF_CONST_FUNC # define OF_NO_RETURN_FUNC # define OF_WEAK_REF(sym) #endif #if __STDC_VERSION__ >= 201112L # define OF_ALIGNOF(type) _Alignof(type) # define OF_ALIGNAS(type) _Alignas(type) #else # define OF_ALIGNOF(type) __alignof__(type) | > > > > | | 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | # define OF_INLINE inline # define OF_LIKELY(cond) (cond) # define OF_UNLIKELY(cond) (cond) # define OF_CONST_FUNC # define OF_NO_RETURN_FUNC # define OF_WEAK_REF(sym) #endif #ifdef __GNUC__ # define OF_ALIGN(alignment) __attribute__((__aligned__(alignment))) #endif #if __STDC_VERSION__ >= 201112L # define OF_ALIGNOF(type) _Alignof(type) # define OF_ALIGNAS(type) _Alignas(type) #else # define OF_ALIGNOF(type) __alignof__(type) # define OF_ALIGNAS(type) OF_ALIGN(OF_ALIGNOF(type)) #endif #if __STDC_VERSION__ >= 201112L && defined(OF_HAVE_MAX_ALIGN_T) # define OF_BIGGEST_ALIGNMENT _Alignof(max_align_t) #else # ifdef __BIGGEST_ALIGNMENT__ # define OF_BIGGEST_ALIGNMENT __BIGGEST_ALIGNMENT__ |
︙ | ︙ |