Index: configure.ac ================================================================== --- configure.ac +++ configure.ac @@ -234,38 +234,35 @@ AC_DEFINE(NEED_OBJC_SYNC_INIT, 1, [Whether objc_sync_init needs to be called])]) atomic_ops="none" - AC_MSG_CHECKING(whether __sync_* works) - AC_TRY_LINK([#include ], [ - int32_t i, j; - if (__sync_add_and_fetch(&i, 1)) - j = __sync_sub_and_fetch(&i, 1); - while (!__sync_bool_compare_and_swap(&i, 0, 1)); + AC_MSG_CHECKING(whether we have an atomic ops assembly implementation) + AC_EGREP_CPP(yes, [ + #if defined(__GNUC__) && (defined(__i386__) || \ + defined(__amd64__) || defined(__x86_64__)) + yes + #endif ], [ AC_MSG_RESULT(yes) - atomic_ops="gcc builtins" - AC_DEFINE(OF_HAVE_GCC_ATOMIC_OPS, 1, - [Whether gcc atomic operations are available]) - ], [ - old_OBJCFLAGS="$OBJCFLAGS" - OBJCFLAGS="$OBJCFLAGS -march=i486" + atomic_ops="assembly implementation" + ], [AC_MSG_RESULT(no)]) + + if test x"$atomic_ops" = x"none"; then + AC_MSG_CHECKING(whether __sync_* works) AC_TRY_LINK([#include ], [ int32_t i, j; if (__sync_add_and_fetch(&i, 1)) j = __sync_sub_and_fetch(&i, 1); while (!__sync_bool_compare_and_swap(&i, 0, 1)); ], [ - AC_MSG_RESULT([yes, with -march=i486]) - atomic_ops="gcc builtins (with -march=i486)" + AC_MSG_RESULT(yes) + atomic_ops="gcc builtins" AC_DEFINE(OF_HAVE_GCC_ATOMIC_OPS, 1, [Whether gcc atomic operations are available]) - AC_SUBST(ATOMIC_OBJCFLAGS, "-march=i486") - ], [ - AC_MSG_RESULT(no) - OBJCFLAGS="$old_OBJCFLAGS"])]) + ], [AC_MSG_RESULT(no)]) + fi if test x"$atomic_ops" = x"none"; then AC_CHECK_HEADER(libkern/OSAtomic.h, [ atomic_ops="libkern/OSAtomic.h" AC_DEFINE(OF_HAVE_LIBKERN_OSATOMIC_H, 1, Index: src/atomic.h ================================================================== --- src/atomic.h +++ src/atomic.h @@ -9,12 +9,12 @@ * the packaging of this file. */ #import "macros.h" -#if defined(OF_THREADS) && !defined(OF_HAVE_GCC_ATOMIC_OPS) && \ - !defined(OF_HAVE_LIBKERN_OSATOMIC_H) +#if defined(OF_THREADS) && !defined(OF_X86_ASM) && !defined(OF_AMD64_ASM) && \ + !defined(OF_HAVE_GCC_ATOMIC_OPS) && !defined(OF_HAVE_LIBKERN_OSATOMIC_H) # error No atomic operations available! #endif #ifdef OF_HAVE_LIBKERN_OSATOMIC_H # include @@ -23,10 +23,19 @@ static OF_INLINE int32_t of_atomic_add_32(volatile int32_t *p, int32_t i) { #if !defined(OF_THREADS) return (*p += i); +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + int32_t r = *p + i; + __asm__ volatile ( + "lock\n\t" + "addl %0, (%1)" + : + : "r"(i), "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_add_and_fetch(p, i); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicAdd32Barrier(i, p); #endif @@ -35,10 +44,19 @@ static OF_INLINE int32_t of_atomic_sub_32(volatile int32_t *p, int32_t i) { #if !defined(OF_THREADS) return (*p -= i); +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + int32_t r = *p - i; + __asm__ volatile ( + "lock\n\t" + "subl %0, (%1)" + : + : "r"(i), "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_sub_and_fetch(p, i); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicAdd32Barrier(-i, p); #endif @@ -47,10 +65,19 @@ static OF_INLINE int32_t of_atomic_inc_32(volatile int32_t *p) { #if !defined(OF_THREADS) return ++*p; +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + int32_t r = *p + 1; + __asm__ volatile ( + "lock\n\t" + "incl (%0)" + : + : "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_add_and_fetch(p, 1); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicIncrement32Barrier(p); #endif @@ -59,10 +86,19 @@ static OF_INLINE int32_t of_atomic_dec_32(volatile int32_t *p) { #if !defined(OF_THREADS) return --*p; +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + int32_t r = *p - 1; + __asm__ volatile ( + "lock\n\t" + "decl (%0)" + : + : "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_sub_and_fetch(p, 1); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicDecrement32Barrier(p); #endif @@ -71,10 +107,19 @@ static OF_INLINE uint32_t of_atomic_or_32(volatile uint32_t *p, uint32_t i) { #if !defined(OF_THREADS) return (*p |= i); +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + uint32_t r = *p | i; + __asm__ volatile ( + "lock\n\t" + "orl %0, (%1)" + : + : "r"(i), "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_or_and_fetch(p, i); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicOr32Barrier(i, p); #endif @@ -83,10 +128,19 @@ static OF_INLINE uint32_t of_atomic_and_32(volatile uint32_t *p, uint32_t i) { #if !defined(OF_THREADS) return (*p &= i); +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + uint32_t r = *p & i; + __asm__ volatile ( + "lock\n\t" + "andl %0, (%1)" + : + : "r"(i), "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_and_and_fetch(p, i); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicAnd32Barrier(i, p); #endif @@ -95,10 +149,19 @@ static OF_INLINE uint32_t of_atomic_xor_32(volatile uint32_t *p, uint32_t i) { #if !defined(OF_THREADS) return (*p ^= i); +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + uint32_t r = *p ^ i; + __asm__ volatile ( + "lock\n\t" + "xorl %0, (%1)" + : + : "r"(i), "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_xor_and_fetch(p, i); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicXor32Barrier(i, p); #endif @@ -112,10 +175,22 @@ *p = n; return YES; } return NO; +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + uint32_t r; + __asm__ volatile ( + "lock; cmpxchg %2, (%3)\n\t" + "lahf\n\t" + "andb $64, %%ah\n\t" + "shrb $6, %%ah\n\t" + "movzx %%ah, %0\n\t" + : "=a"(r) + : "a"(o), "r"(n), "r"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_bool_compare_and_swap(p, o, n); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicCompareAndSwap32Barrier(o, n, p); #endif @@ -129,11 +204,23 @@ *p = n; return YES; } return NO; +#elif defined(OF_X86_ASM) || defined(OF_AMD64_ASM) + uint32_t r; + __asm__ volatile ( + "lock; cmpxchg %2, (%3)\n\t" + "lahf\n\t" + "andb $64, %%ah\n\t" + "shrb $6, %%ah\n\t" + "movzx %%ah, %0\n\t" + : "=a"(r) + : "a"(o), "q"(n), "q"(p), "m"(*p) + ); + return r; #elif defined(OF_HAVE_GCC_ATOMIC_OPS) return __sync_bool_compare_and_swap(p, o, n); #elif defined(OF_HAVE_LIBKERN_OSATOMIC_H) return OSAtomicCompareAndSwapPtrBarrier(o, n, p); #endif } Index: src/macros.h ================================================================== --- src/macros.h +++ src/macros.h @@ -84,15 +84,27 @@ static OF_INLINE uint16_t OF_BSWAP16_NONCONST(uint16_t i) { #if defined(OF_X86_ASM) || defined(OF_AMD64_ASM) - __asm__ ("xchgb %h0, %b0" : "=Q"(i) : "0"(i)); + __asm__ ( + "xchgb %h0, %b0" + : "=Q"(i) + : "0"(i) + ); #elif defined(OF_PPC_ASM) - __asm__ ("lhbrx %0, 0, %1" : "=r"(i) : "r"(&i), "m"(i)); + __asm__ ( + "lhbrx %0, 0, %1" + : "=r"(i) + : "r"(&i), "m"(i) + ); #elif defined(OF_ARM_ASM) - __asm__ ("rev16 %0, %0" : "=r"(i) : "0"(i)); + __asm__ ( + "rev16 %0, %0" + : "=r"(i) + : "0"(i) + ); #else i = (i & UINT16_C(0xFF00)) >> 8 | (i & UINT16_C(0x00FF)) << 8; #endif return i; @@ -100,15 +112,27 @@ static OF_INLINE uint32_t OF_BSWAP32_NONCONST(uint32_t i) { #if defined(OF_X86_ASM) || defined(OF_AMD64_ASM) - __asm__ ("bswap %0" : "=q"(i) : "0"(i)); + __asm__ ( + "bswap %0" + : "=q"(i) + : "0"(i) + ); #elif defined(OF_PPC_ASM) - __asm__ ("lwbrx %0, 0, %1" : "=r"(i) : "r"(&i), "m"(i)); + __asm__ ( + "lwbrx %0, 0, %1" + : "=r"(i) + : "r"(&i), "m"(i) + ); #elif defined(OF_ARM_ASM) - __asm__ ("rev %0, %0" : "=r"(i) : "0"(i)); + __asm__ ( + "rev %0, %0" + : "=r"(i) + : "0"(i) + ); #else i = (i & UINT32_C(0xFF000000)) >> 24 | (i & UINT32_C(0x00FF0000)) >> 8 | (i & UINT32_C(0x0000FF00)) << 8 | (i & UINT32_C(0x000000FF)) << 24; @@ -118,15 +142,23 @@ static OF_INLINE uint64_t OF_BSWAP64_NONCONST(uint64_t i) { #if defined(OF_AMD64_ASM) - __asm__ ("bswap %0" : "=r"(i) : "0"(i)); + __asm__ ( + "bswap %0" + : "=r"(i) + : "0"(i) + ); #elif defined(OF_X86_ASM) - __asm__ ("bswap %%eax\n\t" - "bswap %%edx\n\t" - "xchgl %%eax, %%edx" : "=A"(i) : "0"(i)); + __asm__ ( + "bswap %%eax\n\t" + "bswap %%edx\n\t" + "xchgl %%eax, %%edx" + : "=A"(i) + : "0"(i) + ); #else i = (uint64_t)OF_BSWAP32_NONCONST(i & 0xFFFFFFFF) << 32 | OF_BSWAP32_NONCONST(i >> 32); #endif return i;