Differences From Artifact [c4d1f32dd0]:
- File
src/macros.h
— part of check-in
[7f304f573b]
at
2023-11-03 00:16:18
on branch trunk
— OFMatrix4x4: SSE4.1 for -[transformVectors:count:]
This requires the vectors to be 16 byte aligned. In order to achieve
this, the OFVector4D type is changed to have an alignment of 16 bytes.
However, this does *not* break ABI because the only method actually
requiring 16 byte alignment is -[transformVectors:count:], which was not
in ObjFW 1.0. Hence binaries compiled for ObjFW 1.0 have no 16 byte
alignment for OFVector4D, but also cannot ever call into any code that
needs it. (-[transformedVector:] calls into -[transformVectors:count:],
but creates a properly aligned copy that it passes.) (user: js, size: 19412) [annotate] [blame] [check-ins using]
To Artifact [779db83f80]:
- File src/macros.h — part of check-in [1b22456db6] at 2023-11-09 21:09:29 on branch trunk — Use named operands for __asm__ (user: js, size: 19444) [annotate] [blame] [check-ins using]
︙ | ︙ | |||
494 495 496 497 498 499 500 | OFByteSwap16NonConst(uint16_t i) { #if defined(OF_HAVE_BUILTIN_BSWAP16) return __builtin_bswap16(i); #elif (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) __asm__ ( "xchg{b} { %h0, %b0 | %b0, %h0 }" | | | | | > | | | | | | > | | | | | | | 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 | OFByteSwap16NonConst(uint16_t i) { #if defined(OF_HAVE_BUILTIN_BSWAP16) return __builtin_bswap16(i); #elif (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) __asm__ ( "xchg{b} { %h0, %b0 | %b0, %h0 }" : "=Q" (i) : "0" (i) ); #elif defined(OF_POWERPC) && defined(__GNUC__) __asm__ ( "lhbrx %0, 0, %1" : "=r" (i) : "r" (&i), "m" (i) ); #elif defined(OF_ARMV6) && defined(__GNUC__) __asm__ ( "rev16 %0, %0" : "=r" (i) : "0" (i) ); #else i = (i & UINT16_C(0xFF00)) >> 8 | (i & UINT16_C(0x00FF)) << 8; #endif return i; } static OF_INLINE uint32_t OF_CONST_FUNC OFByteSwap32NonConst(uint32_t i) { #if defined(OF_HAVE_BUILTIN_BSWAP32) return __builtin_bswap32(i); #elif (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) __asm__ ( "bswap %0" : "=q" (i) : "0" (i) ); #elif defined(OF_POWERPC) && defined(__GNUC__) __asm__ ( "lwbrx %0, 0, %1" : "=r" (i) : "r" (&i), "m" (i) ); #elif defined(OF_ARMV6) && defined(__GNUC__) __asm__ ( "rev %0, %0" : "=r" (i) : "0" (i) ); #else i = (i & UINT32_C(0xFF000000)) >> 24 | (i & UINT32_C(0x00FF0000)) >> 8 | (i & UINT32_C(0x0000FF00)) << 8 | (i & UINT32_C(0x000000FF)) << 24; #endif return i; } static OF_INLINE uint64_t OF_CONST_FUNC OFByteSwap64NonConst(uint64_t i) { #if defined(OF_HAVE_BUILTIN_BSWAP64) return __builtin_bswap64(i); #elif defined(OF_AMD64) && defined(__GNUC__) __asm__ ( "bswap %0" : "=r" (i) : "0" (i) ); #elif defined(OF_X86) && defined(__GNUC__) __asm__ ( "bswap {%%}eax\n\t" "bswap {%%}edx\n\t" "xchg{l} { %%eax, %%edx | edx, eax }" : "=A" (i) : "0" (i) ); #else i = (uint64_t)OFByteSwap32NonConst( (uint32_t)(i & UINT32_C(0xFFFFFFFF))) << 32 | OFByteSwap32NonConst((uint32_t)(i >> 32)); #endif return i; |
︙ | ︙ |