Overview
Comment: | OFMatrix4x4: Move __asm__ out of loop |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
0d671245d4bb5953939c96cc4ffe77d9 |
User & Date: | js on 2023-11-02 22:48:20 |
Other Links: | manifest | tags |
Context
2023-11-02
| ||
23:00 | OFMatrix4x4: Minor cleanups check-in: b5c3a36731 user: js tags: trunk | |
22:48 | OFMatrix4x4: Move __asm__ out of loop check-in: 0d671245d4 user: js tags: trunk | |
2023-11-01
| ||
21:22 | OFMatrix4x4: Use __asm__ __volatile__ check-in: f949f7775b user: js tags: trunk | |
Changes
Modified src/OFMatrix4x4.m from [b42a0641ea] to [a12ee064b0].
︙ | ︙ | |||
34 35 36 37 38 39 40 | # pragma GCC push_options # pragma GCC target("3dnow") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { | > | < < | > > > | | | | | | | | | | > > > > > > > > > > > > > > > | < | | < < < < > | < < | > > > | | | | | | | | | | | > > > > > > > > > > > > > > > | < | | < < < < | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | # pragma GCC push_options # pragma GCC target("3dnow") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float *left = &matrix->_values[0][0], *right = &self->_values[0][0]; float result[4][4], *resultPtr = &result[0][0]; __asm__ __volatile__ ( "xorw %%cx, %%cx\n" "\n\t" "0:\n\t" "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" "pfmul (%1), %%mm0\n\t" "movd 32(%2), %%mm1\n\t" "punpckldq 48(%2), %%mm1\n\t" "pfmul 8(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "pswapd %%mm0, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movd %%mm0, (%0)\n" "\n\t" "add $4, %0\n\t" "add $4, %2\n\t" "incb %%cl\n\t" "cmpb $4, %%cl\n\t" "jb 0b\n" "\n\t" "add $16, %1\n\t" "sub $16, %2\n\t" "xorb %%cl, %%cl\n\t" "incb %%ch\n\t" "cmpb $4, %%ch\n\t" "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); memcpy(self->_values, result, sizeof(result)); } static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float *left = &matrix->_values[0][0], *right = &self->_values[0][0]; float result[4][4], *resultPtr = &result[0][0]; __asm__ __volatile__ ( "xorw %%cx, %%cx\n" "\n\t" "0:\n\t" "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" "pfmul (%1), %%mm0\n\t" "movd 32(%2), %%mm1\n\t" "punpckldq 48(%2), %%mm1\n\t" "pfmul 8(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movd %%mm0, (%0)\n" "\n\t" "add $4, %0\n\t" "add $4, %2\n\t" "incb %%cl\n\t" "cmpb $4, %%cl\n\t" "jb 0b\n" "\n\t" "add $16, %1\n\t" "sub $16, %2\n\t" "xorb %%cl, %%cl\n\t" "incb %%ch\n\t" "cmpb $4, %%ch\n\t" "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); memcpy(self->_values, result, sizeof(result)); } static void transformVectors_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, size_t count) |
︙ | ︙ | |||
213 214 215 216 217 218 219 | "0:\n\t" "femms" : "+r"(count), "+r"(vectors) : "r"(&self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); } | < | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | "0:\n\t" "femms" : "+r"(count), "+r"(vectors) : "r"(&self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); } # ifndef __clang__ # pragma GCC pop_options # endif + (void)initialize { const char *typeEncoding; |
︙ | ︙ |