@@ -1,7 +1,7 @@ /* - * Copyright (c) 2008-2022 Jonathan Schleifer + * Copyright (c) 2008-2024 Jonathan Schleifer * * All rights reserved. * * This file is part of ObjFW. It may be distributed under the terms of the * Q Public License 1.0, which can be found in the file LICENSE.QPL included in @@ -13,225 +13,179 @@ * file. */ OF_ASSUME_NONNULL_BEGIN -static OF_INLINE int -OFAtomicIntAdd(volatile int *_Nonnull p, int i) -{ - if (sizeof(int) == 4) - __asm__ __volatile__ ( - "lock\n\t" - "xaddl %0, %2\n\t" - "addl %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) - ); -#ifdef OF_X86_64 - else if (sizeof(int) == 8) - __asm__ __volatile__ ( - "lock\n\t" - "xaddq %0, %2\n\t" - "addq %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) - ); -#endif - else - abort(); - - return i; -} - static OF_INLINE int32_t OFAtomicInt32Add(volatile int32_t *_Nonnull p, int32_t i) { __asm__ __volatile__ ( "lock\n\t" - "xaddl %0, %2\n\t" - "addl %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) + "xadd{l} { %0, %2 | %2, %0 }\n\t" + "add{l} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) ); + + return i; +} + +static OF_INLINE int +OFAtomicIntAdd(volatile int *_Nonnull p, int i) +{ + if (sizeof(int) == 4) + return OFAtomicInt32Add(p, i); +#ifdef OF_AMD64 + else if (sizeof(int) == 8) + __asm__ __volatile__ ( + "lock\n\t" + "xadd{q} { %0, %2 | %2, %0 }\n\t" + "add{q} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) + ); +#endif + else + abort(); return i; } static OF_INLINE void *_Nullable OFAtomicPointerAdd(void *volatile _Nullable *_Nonnull p, intptr_t i) { -#if defined(OF_X86_64) +#if defined(OF_AMD64) __asm__ __volatile__ ( "lock\n\t" - "xaddq %0, %2\n\t" - "addq %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) + "xadd{q} { %0, %2 | %2, %0 }\n\t" + "add{q} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #elif defined(OF_X86) __asm__ __volatile__ ( "lock\n\t" - "xaddl %0, %2\n\t" - "addl %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) + "xadd{l} { %0, %2 | %2, %0 }\n\t" + "add{l} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #endif } + +static OF_INLINE int32_t +OFAtomicInt32Subtract(volatile int32_t *_Nonnull p, int32_t i) +{ + __asm__ __volatile__ ( + "neg{l} %0\n\t" + "lock\n\t" + "xadd{l} { %0, %2 | %2, %0 }\n\t" + "sub{l} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) + ); + + return i; +} static OF_INLINE int OFAtomicIntSubtract(volatile int *_Nonnull p, int i) { if (sizeof(int) == 4) - __asm__ __volatile__ ( - "negl %0\n\t" - "lock\n\t" - "xaddl %0, %2\n\t" - "subl %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) - ); -#ifdef OF_X86_64 + return OFAtomicInt32Subtract(p, i); +#ifdef OF_AMD64 else if (sizeof(int) == 8) __asm__ __volatile__ ( - "negq %0\n\t" + "neg{q} %0\n\t" "lock\n\t" - "xaddq %0, %2\n\t" - "subq %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) + "xadd{q} { %0, %2 | %2, %0 }\n\t" + "sub{q} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) ); #endif else abort(); return i; } -static OF_INLINE int32_t -OFAtomicInt32Subtract(volatile int32_t *_Nonnull p, int32_t i) -{ - __asm__ __volatile__ ( - "negl %0\n\t" - "lock\n\t" - "xaddl %0, %2\n\t" - "subl %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) - ); - - return i; -} - static OF_INLINE void *_Nullable OFAtomicPointerSubtract(void *volatile _Nullable *_Nonnull p, intptr_t i) { -#if defined(OF_X86_64) +#if defined(OF_AMD64) __asm__ __volatile__ ( - "negq %0\n\t" + "neg{q} %0\n\t" "lock\n\t" - "xaddq %0, %2\n\t" - "subq %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) + "xadd{q} { %0, %2 | %2, %0 }\n\t" + "sub{q} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #elif defined(OF_X86) __asm__ __volatile__ ( - "negl %0\n\t" + "neg{l} %0\n\t" "lock\n\t" - "xaddl %0, %2\n\t" - "subl %1, %0" - : "+&r"(i) - : "r"(i), "m"(*p) + "xadd{l} { %0, %2 | %2, %0 }\n\t" + "sub{l} { %1, %0 | %0, %1 }" + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #endif } + +static OF_INLINE int32_t +OFAtomicInt32Increase(volatile int32_t *_Nonnull p) +{ + int32_t i; + + __asm__ __volatile__ ( + "xor{l} %0, %0\n\t" + "inc{l} %0\n\t" + "lock\n\t" + "xadd{l} { %0, %1 | %1, %0 }\n\t" + "inc{l} %0" + : "=&r" (i) + : "m" (*p) + ); + + return i; +} static OF_INLINE int OFAtomicIntIncrease(volatile int *_Nonnull p) { int i; if (sizeof(int) == 4) - __asm__ __volatile__ ( - "xorl %0, %0\n\t" - "incl %0\n\t" - "lock\n\t" - "xaddl %0, %1\n\t" - "incl %0" - : "=&r"(i) - : "m"(*p) - ); -#ifdef OF_X86_64 - else if (sizeof(int) == 8) - __asm__ __volatile__ ( - "xorq %0, %0\n\t" - "incq %0\n\t" - "lock\n\t" - "xaddq %0, %1\n\t" - "incq %0" - : "=&r"(i) - : "m"(*p) - ); -#endif - else - abort(); - - return i; -} - -static OF_INLINE int32_t -OFAtomicInt32Increase(volatile int32_t *_Nonnull p) -{ - int32_t i; - - __asm__ __volatile__ ( - "xorl %0, %0\n\t" - "incl %0\n\t" - "lock\n\t" - "xaddl %0, %1\n\t" - "incl %0" - : "=&r"(i) - : "m"(*p) - ); - - return i; -} - -static OF_INLINE int -OFAtomicIntDecrease(volatile int *_Nonnull p) -{ - int i; - - if (sizeof(int) == 4) - __asm__ __volatile__ ( - "xorl %0, %0\n\t" - "decl %0\n\t" - "lock\n\t" - "xaddl %0, %1\n\t" - "decl %0" - : "=&r"(i) - : "m"(*p) - ); -#ifdef OF_X86_64 - else if (sizeof(int) == 8) - __asm__ __volatile__ ( - "xorq %0, %0\n\t" - "decq %0\n\t" - "lock\n\t" - "xaddq %0, %1\n\t" - "decq %0" - : "=&r"(i) - : "m"(*p) + return OFAtomicInt32Increase(p); +#ifdef OF_AMD64 + else if (sizeof(int) == 8) + __asm__ __volatile__ ( + "xor{q} %0, %0\n\t" + "inc{q} %0\n\t" + "lock\n\t" + "xadd{q} { %0, %1 | %1, %0 }\n\t" + "inc{q} %0" + : "=&r" (i) + : "m" (*p) ); #endif else abort(); @@ -242,51 +196,39 @@ OFAtomicInt32Decrease(volatile int32_t *_Nonnull p) { int32_t i; __asm__ __volatile__ ( - "xorl %0, %0\n\t" - "decl %0\n\t" + "xor{l} %0, %0\n\t" + "dec{l} %0\n\t" "lock\n\t" - "xaddl %0, %1\n\t" - "decl %0" - : "=&r"(i) - : "m"(*p) + "xadd{l} { %0, %1 | %1, %0 }\n\t" + "dec{l} %0" + : "=&r" (i) + : "m" (*p) ); return i; } -static OF_INLINE unsigned int -OFAtomicIntOr(volatile unsigned int *_Nonnull p, unsigned int i) +static OF_INLINE int +OFAtomicIntDecrease(volatile int *_Nonnull p) { + int i; + if (sizeof(int) == 4) - __asm__ __volatile__ ( - "0:\n\t" - "movl %2, %0\n\t" - "movl %0, %%eax\n\t" - "orl %1, %0\n\t" - "lock\n\t" - "cmpxchg %0, %2\n\t" - "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) - : "eax", "cc" - ); -#ifdef OF_X86_64 + return OFAtomicInt32Decrease(p); +#ifdef OF_AMD64 else if (sizeof(int) == 8) __asm__ __volatile__ ( - "0:\n\t" - "movq %2, %0\n\t" - "movq %0, %%rax\n\t" - "orq %1, %0\n\t" + "xor{q} %0, %0\n\t" + "dec{q} %0\n\t" "lock\n\t" - "cmpxchg %0, %2\n\t" - "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) - : "rax", "cc" + "xadd{q} { %0, %1 | %1, %0 }\n\t" + "dec{q} %0" + : "=&r" (i) + : "m" (*p) ); #endif else abort(); @@ -296,52 +238,43 @@ static OF_INLINE uint32_t OFAtomicInt32Or(volatile uint32_t *_Nonnull p, uint32_t i) { __asm__ __volatile__ ( "0:\n\t" - "movl %2, %0\n\t" - "movl %0, %%eax\n\t" - "orl %1, %0\n\t" + "mov{l} { %2, %0 | %0, %2 }\n\t" + "mov{l} { %0, %%eax | eax, %0 }\n\t" + "or{l} { %1, %0 | %0, %1 }\n\t" "lock\n\t" - "cmpxchg %0, %2\n\t" + "cmpxchg{l} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "eax", "cc" ); return i; } static OF_INLINE unsigned int -OFAtomicIntAnd(volatile unsigned int *_Nonnull p, unsigned int i) +OFAtomicIntOr(volatile unsigned int *_Nonnull p, unsigned int i) { if (sizeof(int) == 4) - __asm__ __volatile__ ( - "0:\n\t" - "movl %2, %0\n\t" - "movl %0, %%eax\n\t" - "andl %1, %0\n\t" - "lock\n\t" - "cmpxchg %0, %2\n\t" - "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) - : "eax", "cc" - ); -#ifdef OF_X86_64 + return OFAtomicInt32Or(p, i); +#ifdef OF_AMD64 else if (sizeof(int) == 8) __asm__ __volatile__ ( "0:\n\t" - "movq %2, %0\n\t" - "movq %0, %%rax\n\t" - "andq %1, %0\n\t" + "mov{q} { %2, %0 | %0, %2 }\n\t" + "mov{q} { %0, %%rax | rax, %0 }\n\t" + "or{q} { %1, %0 | %0, %1 }\n\t" "lock\n\t" - "cmpxchg %0, %2\n\t" + "cmpxchg{q} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "rax", "cc" ); #endif else abort(); @@ -352,52 +285,43 @@ static OF_INLINE uint32_t OFAtomicInt32And(volatile uint32_t *_Nonnull p, uint32_t i) { __asm__ __volatile__ ( "0:\n\t" - "movl %2, %0\n\t" - "movl %0, %%eax\n\t" - "andl %1, %0\n\t" + "mov{l} { %2, %0 | %0, %2 }\n\t" + "mov{l} { %0, %%eax | eax, %0 }\n\t" + "and{l} { %1, %0 | %0, %1 }\n\t" "lock\n\t" - "cmpxchg %0, %2\n\t" + "cmpxchg{l} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "eax", "cc" ); return i; } static OF_INLINE unsigned int -OFAtomicIntXor(volatile unsigned int *_Nonnull p, unsigned int i) +OFAtomicIntAnd(volatile unsigned int *_Nonnull p, unsigned int i) { if (sizeof(int) == 4) - __asm__ __volatile__ ( - "0:\n\t" - "movl %2, %0\n\t" - "movl %0, %%eax\n\t" - "xorl %1, %0\n\t" - "lock\n\t" - "cmpxchg %0, %2\n\t" - "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) - : "eax", "cc" - ); -#ifdef OF_X86_64 + return OFAtomicInt32And(p, i); +#ifdef OF_AMD64 else if (sizeof(int) == 8) __asm__ __volatile__ ( "0:\n\t" - "movq %2, %0\n\t" - "movq %0, %%rax\n\t" - "xorq %1, %0\n\t" + "mov{q} { %2, %0 | %0, %2 }\n\t" + "mov{q} { %0, %%rax | rax, %0 }\n\t" + "and{q} { %1, %0 | %0, %1 }\n\t" "lock\n\t" - "cmpxchg %0, %2\n\t" + "cmpxchg{q} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "rax", "cc" ); #endif else abort(); @@ -408,54 +332,86 @@ static OF_INLINE uint32_t OFAtomicInt32Xor(volatile uint32_t *_Nonnull p, uint32_t i) { __asm__ __volatile__ ( "0:\n\t" - "movl %2, %0\n\t" - "movl %0, %%eax\n\t" - "xorl %1, %0\n\t" + "mov{l} { %2, %0 | %0, %2 }\n\t" + "mov{l} { %0, %%eax | eax, %0 }\n\t" + "xor{l} { %1, %0 | %0, %1 }\n\t" "lock\n\t" - "cmpxchgl %0, %2\n\t" + "cmpxchg{l} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "eax", "cc" ); return i; } -static OF_INLINE bool -OFAtomicIntCompareAndSwap(volatile int *_Nonnull p, int o, int n) -{ - int r; - - __asm__ __volatile__ ( - "lock\n\t" - "cmpxchg %2, %3\n\t" - "sete %b0\n\t" - "movzbl %b0, %0" - : "=&d"(r), "+a"(o) /* use d instead of r to avoid a gcc bug */ - : "r"(n), "m"(*p) - : "cc" - ); - - return r; +static OF_INLINE unsigned int +OFAtomicIntXor(volatile unsigned int *_Nonnull p, unsigned int i) +{ + if (sizeof(int) == 4) + return OFAtomicInt32Xor(p, i); +#ifdef OF_AMD64 + else if (sizeof(int) == 8) + __asm__ __volatile__ ( + "0:\n\t" + "mov{q} { %2, %0 | %0, %2 }\n\t" + "mov{q} { %0, %%rax | rax, %0 }\n\t" + "xor{q} { %1, %0 | %0, %1 }\n\t" + "lock\n\t" + "cmpxchg{q} { %0, %2 | %2, %0 }\n\t" + "jne 0b" + : "=&r" (i) + : "r" (i), + "m" (*p) + : "rax", "cc" + ); +#endif + else + abort(); + + return i; } static OF_INLINE bool OFAtomicInt32CompareAndSwap(volatile int32_t *_Nonnull p, int32_t o, int32_t n) { int r; __asm__ __volatile__ ( "lock\n\t" - "cmpxchg %2, %3\n\t" + "cmpxchg{l} { %2, %3 | %3, %2 }\n\t" + "sete %b0\n\t" + "movz{bl|x} { %b0, %0 | %0, %b0 }" + : "=&d" (r), /* use d instead of r to avoid a gcc bug */ + "+a" (o) + : "r" (n), + "m" (*p) + : "cc" + ); + + return r; +} + +static OF_INLINE bool +OFAtomicIntCompareAndSwap(volatile int *_Nonnull p, int o, int n) +{ + int r; + + __asm__ __volatile__ ( + "lock\n\t" + "cmpxchg { %2, %3 | %3, %2 }\n\t" "sete %b0\n\t" - "movzbl %b0, %0" - : "=&d"(r), "+a"(o) /* use d instead of r to avoid a gcc bug */ - : "r"(n), "m"(*p) + "movz{bl|x} { %b0, %0 | %0, %b0 }" + : "=&d" (r), /* use d instead of r to avoid a gcc bug */ + "+a" (o) + : "r" (n), + "m" (*p) : "cc" ); return r; } @@ -466,27 +422,35 @@ { int r; __asm__ __volatile__ ( "lock\n\t" - "cmpxchg %2, %3\n\t" + "cmpxchg { %2, %3 | %3, %2 }\n\t" "sete %b0\n\t" - "movzbl %b0, %0" - : "=&d"(r), "+a"(o) /* use d instead of r to avoid a gcc bug */ - : "r"(n), "m"(*p) + "movz{bl|x} { %b0, %0 | %0, %b0 }" + : "=&d" (r), /* use d instead of r to avoid a gcc bug */ + "+a" (o) + : "r" (n), + "m" (*p) : "cc" ); return r; } static OF_INLINE void OFMemoryBarrier(void) { +#ifdef OF_AMD64 + __asm__ __volatile__ ( + "lock or{q} { $0, (%%rsp) | [rsp], 0 }" ::: "memory", "cc" + ); +#else __asm__ __volatile__ ( - "mfence" ::: "memory" + "lock or{l} { $0, (%%esp) | [esp], 0 }" ::: "memory", "cc" ); +#endif } static OF_INLINE void OFAcquireMemoryBarrier(void) {