Differences From Artifact [b42a0641ea]:
- File
src/OFMatrix4x4.m
— part of check-in
[f949f7775b]
at
2023-11-01 21:22:10
on branch trunk
— OFMatrix4x4: Use __asm__ __volatile__
The outputs aren't consumed as they are used as inputs that can be
modified. (user: js, size: 9582) [annotate] [blame] [check-ins using]
To Artifact [a12ee064b0]:
- File src/OFMatrix4x4.m — part of check-in [0d671245d4] at 2023-11-02 22:48:20 on branch trunk — OFMatrix4x4: Move __asm__ out of loop (user: js, size: 10093) [annotate] [blame] [check-ins using]
︙ | ︙ | |||
34 35 36 37 38 39 40 | # pragma GCC push_options # pragma GCC target("3dnow") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { | > | < < | > > > | | | | | | | | | | > > > > > > > > > > > > > > > | < | | < < < < > | < < | > > > | | | | | | | | | | | > > > > > > > > > > > > > > > | < | | < < < < | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | # pragma GCC push_options # pragma GCC target("3dnow") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float *left = &matrix->_values[0][0], *right = &self->_values[0][0]; float result[4][4], *resultPtr = &result[0][0]; __asm__ __volatile__ ( "xorw %%cx, %%cx\n" "\n\t" "0:\n\t" "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" "pfmul (%1), %%mm0\n\t" "movd 32(%2), %%mm1\n\t" "punpckldq 48(%2), %%mm1\n\t" "pfmul 8(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "pswapd %%mm0, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movd %%mm0, (%0)\n" "\n\t" "add $4, %0\n\t" "add $4, %2\n\t" "incb %%cl\n\t" "cmpb $4, %%cl\n\t" "jb 0b\n" "\n\t" "add $16, %1\n\t" "sub $16, %2\n\t" "xorb %%cl, %%cl\n\t" "incb %%ch\n\t" "cmpb $4, %%ch\n\t" "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); memcpy(self->_values, result, sizeof(result)); } static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float *left = &matrix->_values[0][0], *right = &self->_values[0][0]; float result[4][4], *resultPtr = &result[0][0]; __asm__ __volatile__ ( "xorw %%cx, %%cx\n" "\n\t" "0:\n\t" "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" "pfmul (%1), %%mm0\n\t" "movd 32(%2), %%mm1\n\t" "punpckldq 48(%2), %%mm1\n\t" "pfmul 8(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movd %%mm0, (%0)\n" "\n\t" "add $4, %0\n\t" "add $4, %2\n\t" "incb %%cl\n\t" "cmpb $4, %%cl\n\t" "jb 0b\n" "\n\t" "add $16, %1\n\t" "sub $16, %2\n\t" "xorb %%cl, %%cl\n\t" "incb %%ch\n\t" "cmpb $4, %%ch\n\t" "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); memcpy(self->_values, result, sizeof(result)); } static void transformVectors_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, size_t count) |
︙ | ︙ | |||
213 214 215 216 217 218 219 | "0:\n\t" "femms" : "+r"(count), "+r"(vectors) : "r"(&self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); } | < | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | "0:\n\t" "femms" : "+r"(count), "+r"(vectors) : "r"(&self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); } # ifndef __clang__ # pragma GCC pop_options # endif + (void)initialize { const char *typeEncoding; |
︙ | ︙ |