Overview
Comment: | Work around Clang not aligning ivars correctly |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
055e14fc75b603d54098c112fa727a58 |
User & Date: | js on 2023-11-04 13:06:52 |
Other Links: | manifest | tags |
Context
2023-11-04
| ||
13:10 | OFMatrix4x4: Restore SSE4.1 code check-in: 0eb97e4612 user: js tags: trunk | |
13:06 | Work around Clang not aligning ivars correctly check-in: 055e14fc75 user: js tags: trunk | |
2023-11-03
| ||
01:07 | OFMatrix4x4: Remove SSE4.1 due to Clang bugs check-in: 7e1dbda4b4 user: js tags: trunk | |
Changes
Modified src/OFMatrix4x4.h from [12afeb3f7a] to [ecc8ac5d95].
︙ | ︙ | |||
19 20 21 22 23 24 25 | /** * @brief A 4x4 matrix of floats. */ OF_SUBCLASSING_RESTRICTED @interface OFMatrix4x4: OFObject <OFCopying> { | | | 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | /** * @brief A 4x4 matrix of floats. */ OF_SUBCLASSING_RESTRICTED @interface OFMatrix4x4: OFObject <OFCopying> { float (*_values)[4]; } #ifdef OF_HAVE_CLASS_PROPERTIES @property (readonly, class) OFMatrix4x4 *identityMatrix; #endif /** |
︙ | ︙ |
Modified src/OFMatrix4x4.m from [740fcdd16f] to [1cf8596c69].
︙ | ︙ | |||
34 35 36 37 38 39 40 | # pragma GCC push_options # pragma GCC target("3dnow,3dnowa") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { | | | | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | # pragma GCC push_options # pragma GCC target("3dnow,3dnowa") # endif static void multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float (*left)[4] = matrix->_values, (*right)[4] = self->_values; float result[4][4], (*resultPtr)[4] = result; __asm__ __volatile__ ( "xorw %%cx, %%cx\n" "\n\t" "0:\n\t" "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" |
︙ | ︙ | |||
70 71 72 73 74 75 76 | "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); | | | 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); memcpy(self->_values, result, 16 * sizeof(float)); } static void transformVectors_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, size_t count) { __asm__ __volatile__ ( |
︙ | ︙ | |||
128 129 130 131 132 133 134 | "add $16, %1\n\t" "dec %0\n\t" "jnz 0b\n" "\n\t" "0:\n\t" "femms" : "+r"(count), "+r"(vectors) | | | | | 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | "add $16, %1\n\t" "dec %0\n\t" "jnz 0b\n" "\n\t" "0:\n\t" "femms" : "+r"(count), "+r"(vectors) : "r"(self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); } # ifndef __clang__ # pragma GCC pop_options # endif # ifndef __clang__ # pragma GCC push_options # pragma GCC target("3dnow") # endif static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float (*left)[4] = matrix->_values, (*right)[4] = self->_values; float result[4][4], (*resultPtr)[4] = result; __asm__ __volatile__ ( "xorw %%cx, %%cx\n" "\n\t" "0:\n\t" "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" |
︙ | ︙ | |||
180 181 182 183 184 185 186 | "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); | | | 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | "jb 0b\n" "\n\t" "femms" : "+r"(resultPtr), "+r"(left), "+r"(right) :: "cx", "mm0", "mm1", "memory" ); memcpy(self->_values, result, 16 * sizeof(float)); } static void transformVectors_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, size_t count) { __asm__ __volatile__ ( |
︙ | ︙ | |||
242 243 244 245 246 247 248 | "add $16, %1\n\t" "dec %0\n\t" "jnz 0b\n" "\n\t" "0:\n\t" "femms" : "+r"(count), "+r"(vectors) | | | 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | "add $16, %1\n\t" "dec %0\n\t" "jnz 0b\n" "\n\t" "0:\n\t" "femms" : "+r"(count), "+r"(vectors) : "r"(self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); } # ifndef __clang__ # pragma GCC pop_options # endif |
︙ | ︙ | |||
279 280 281 282 283 284 285 286 287 288 289 290 291 292 | transformVectors_3DNow) } } # undef REPLACE } #endif + (OFMatrix4x4 *)identityMatrix { return [[[OFMatrix4x4 alloc] initWithValues: identityValues] autorelease]; } | > > > > > > > > > > > > | 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 | transformVectors_3DNow) } } # undef REPLACE } #endif + (instancetype)alloc { OFMatrix4x4 *instance; float (*values)[4]; instance = OFAllocObject(self, 16 * sizeof(float), 16, (void **)&values); instance->_values = values; return instance; } + (OFMatrix4x4 *)identityMatrix { return [[[OFMatrix4x4 alloc] initWithValues: identityValues] autorelease]; } |
︙ | ︙ | |||
300 301 302 303 304 305 306 | OF_INVALID_INIT_METHOD } - (instancetype)initWithValues: (const float [4][4])values { self = [super init]; | | | < | | 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | OF_INVALID_INIT_METHOD } - (instancetype)initWithValues: (const float [4][4])values { self = [super init]; memcpy(_values, values, 16 * sizeof(float)); return self; } - (float (*)[4])values { return _values; } - (instancetype)copy { return [[OFMatrix4x4 alloc] initWithValues: _values]; } - (bool)isEqual: (OFMatrix4x4 *)matrix { if (![matrix isKindOfClass: [OFMatrix4x4 class]]) return false; return (memcmp(_values, matrix->_values, 16 * sizeof(float)) == 0); } - (unsigned long)hash { unsigned long hash; OFHashInit(&hash); |
︙ | ︙ | |||
351 352 353 354 355 356 357 | for (uint_fast8_t j = 0; j < 4; j++) result[i][j] = matrix->_values[i][0] * _values[0][j] + matrix->_values[i][1] * _values[1][j] + matrix->_values[i][2] * _values[2][j] + matrix->_values[i][3] * _values[3][j]; | | | 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 | for (uint_fast8_t j = 0; j < 4; j++) result[i][j] = matrix->_values[i][0] * _values[0][j] + matrix->_values[i][1] * _values[1][j] + matrix->_values[i][2] * _values[2][j] + matrix->_values[i][3] * _values[3][j]; memcpy(_values, result, 16 * sizeof(float)); } - (void)translateWithVector: (OFVector3D)vector { OFMatrix4x4 *translation = [[OFMatrix4x4 alloc] initWithValues: (const float [4][4]){ { 1, 0, 0, vector.x }, |
︙ | ︙ |