Index: src/OFMatrix4x4.m ================================================================== --- src/OFMatrix4x4.m +++ src/OFMatrix4x4.m @@ -39,65 +39,67 @@ size_t count) { OF_ALIGN(16) float tmp[4]; __asm__ __volatile__ ( - "test %0, %0\n\t" + "test %[count], %[count]\n\t" "jz 0f\n" "\n\t" - "movaps (%2), %%xmm0\n\t" - "movaps 16(%2), %%xmm1\n\t" - "movaps 32(%2), %%xmm2\n\t" + "movaps (%[matrix]), %%xmm0\n\t" + "movaps 16(%[matrix]), %%xmm1\n\t" + "movaps 32(%[matrix]), %%xmm2\n\t" # ifdef OF_AMD64 - "movaps 48(%2), %%xmm8\n" + "movaps 48(%[matrix]), %%xmm8\n" # endif "\n\t" "0:\n\t" - "movaps (%1), %%xmm3\n" + "movaps (%[vectors]), %%xmm3\n" "\n\t" "movaps %%xmm0, %%xmm4\n\t" "mulps %%xmm3, %%xmm4\n\t" - "movaps %%xmm4, (%3)\n\t" - "addss 4(%3), %%xmm4\n\t" - "addss 8(%3), %%xmm4\n\t" - "addss 12(%3), %%xmm4\n" + "movaps %%xmm4, (%[tmp])\n\t" + "addss 4(%[tmp]), %%xmm4\n\t" + "addss 8(%[tmp]), %%xmm4\n\t" + "addss 12(%[tmp]), %%xmm4\n" "\n\t" "movaps %%xmm1, %%xmm5\n\t" "mulps %%xmm3, %%xmm5\n\t" - "movaps %%xmm5, (%3)\n\t" - "addss 4(%3), %%xmm5\n\t" - "addss 8(%3), %%xmm5\n\t" - "addss 12(%3), %%xmm5\n" + "movaps %%xmm5, (%[tmp])\n\t" + "addss 4(%[tmp]), %%xmm5\n\t" + "addss 8(%[tmp]), %%xmm5\n\t" + "addss 12(%[tmp]), %%xmm5\n" "\n\t" "movaps %%xmm2, %%xmm6\n\t" "mulps %%xmm3, %%xmm6\n\t" - "movaps %%xmm6, (%3)\n\t" - "addss 4(%3), %%xmm6\n\t" - "addss 8(%3), %%xmm6\n\t" - "addss 12(%3), %%xmm6\n" + "movaps %%xmm6, (%[tmp])\n\t" + "addss 4(%[tmp]), %%xmm6\n\t" + "addss 8(%[tmp]), %%xmm6\n\t" + "addss 12(%[tmp]), %%xmm6\n" "\n\t" # ifdef OF_AMD64 "movaps %%xmm8, %%xmm7\n\t" # else - "movaps 48(%2), %%xmm7\n\t" + "movaps 48(%[matrix]), %%xmm7\n\t" # endif "mulps %%xmm3, %%xmm7\n\t" - "movaps %%xmm7, (%3)\n\t" - "addss 4(%3), %%xmm7\n\t" - "addss 8(%3), %%xmm7\n\t" - "addss 12(%3), %%xmm7\n" - "\n\t" - "movss %%xmm4, (%1)\n\t" - "movss %%xmm5, 4(%1)\n\t" - "movss %%xmm6, 8(%1)\n\t" - "movss %%xmm7, 12(%1)\n" - "\n\t" - "add $16, %1\n\t" - "dec %0\n\t" + "movaps %%xmm7, (%[tmp])\n\t" + "addss 4(%[tmp]), %%xmm7\n\t" + "addss 8(%[tmp]), %%xmm7\n\t" + "addss 12(%[tmp]), %%xmm7\n" + "\n\t" + "movss %%xmm4, (%[vectors])\n\t" + "movss %%xmm5, 4(%[vectors])\n\t" + "movss %%xmm6, 8(%[vectors])\n\t" + "movss %%xmm7, 12(%[vectors])\n" + "\n\t" + "add $16, %[vectors]\n\t" + "dec %[count]\n\t" "jnz 0b\n" - : "+r"(count), "+r"(vectors) - : "r"(self->_values), "r"(&tmp) + : [count] "+r" (count), + [vectors] "+r" (vectors) + : [matrix] "r" (self->_values), + [tmp] "r" (&tmp) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", # ifdef OF_AMD64 "xmm8", # endif "memory" @@ -119,55 +121,58 @@ __asm__ __volatile__ ( "movl $4, %%ecx\n\t" "\n\t" "0:\n\t" - "movd (%2), %%mm0\n\t" - "punpckldq 16(%2), %%mm0\n\t" - "pfmul (%1), %%mm0\n\t" - "movd 32(%2), %%mm1\n\t" - "punpckldq 48(%2), %%mm1\n\t" - "pfmul 8(%1), %%mm1\n\t" - "pfacc %%mm1, %%mm0\n\t" - "pfacc %%mm0, %%mm0\n\t" - "movd %%mm0, (%0)\n\t" - "movd 4(%2), %%mm0\n\t" - "punpckldq 20(%2), %%mm0\n\t" - "pfmul (%1), %%mm0\n\t" - "movd 36(%2), %%mm1\n\t" - "punpckldq 52(%2), %%mm1\n\t" - "pfmul 8(%1), %%mm1\n\t" - "pfacc %%mm1, %%mm0\n\t" - "pfacc %%mm0, %%mm0\n\t" - "movd %%mm0, 4(%0)\n\t" - "movd 8(%2), %%mm0\n\t" - "punpckldq 24(%2), %%mm0\n\t" - "pfmul (%1), %%mm0\n\t" - "movd 40(%2), %%mm1\n\t" - "punpckldq 56(%2), %%mm1\n\t" - "pfmul 8(%1), %%mm1\n\t" - "pfacc %%mm1, %%mm0\n\t" - "pfacc %%mm0, %%mm0\n\t" - "movd %%mm0, 8(%0)\n\t" - "movd 12(%2), %%mm0\n\t" - "punpckldq 28(%2), %%mm0\n\t" - "pfmul (%1), %%mm0\n\t" - "movd 44(%2), %%mm1\n\t" - "punpckldq 60(%2), %%mm1\n\t" - "pfmul 8(%1), %%mm1\n\t" - "pfacc %%mm1, %%mm0\n\t" - "pfacc %%mm0, %%mm0\n\t" - "movd %%mm0, 12(%0)\n" - "\n\t" - "add $16, %0\n\t" - "add $16, %1\n\t" + "movd (%[right]), %%mm0\n\t" + "punpckldq 16(%[right]), %%mm0\n\t" + "pfmul (%[left]), %%mm0\n\t" + "movd 32(%[right]), %%mm1\n\t" + "punpckldq 48(%[right]), %%mm1\n\t" + "pfmul 8(%[left]), %%mm1\n\t" + "pfacc %%mm1, %%mm0\n\t" + "pfacc %%mm0, %%mm0\n\t" + "movd %%mm0, (%[result])\n\t" + "movd 4(%[right]), %%mm0\n\t" + "punpckldq 20(%[right]), %%mm0\n\t" + "pfmul (%[left]), %%mm0\n\t" + "movd 36(%[right]), %%mm1\n\t" + "punpckldq 52(%[right]), %%mm1\n\t" + "pfmul 8(%[left]), %%mm1\n\t" + "pfacc %%mm1, %%mm0\n\t" + "pfacc %%mm0, %%mm0\n\t" + "movd %%mm0, 4(%[result])\n\t" + "movd 8(%[right]), %%mm0\n\t" + "punpckldq 24(%[right]), %%mm0\n\t" + "pfmul (%[left]), %%mm0\n\t" + "movd 40(%[right]), %%mm1\n\t" + "punpckldq 56(%[right]), %%mm1\n\t" + "pfmul 8(%[left]), %%mm1\n\t" + "pfacc %%mm1, %%mm0\n\t" + "pfacc %%mm0, %%mm0\n\t" + "movd %%mm0, 8(%[result])\n\t" + "movd 12(%[right]), %%mm0\n\t" + "punpckldq 28(%[right]), %%mm0\n\t" + "pfmul (%[left]), %%mm0\n\t" + "movd 44(%[right]), %%mm1\n\t" + "punpckldq 60(%[right]), %%mm1\n\t" + "pfmul 8(%[left]), %%mm1\n\t" + "pfacc %%mm1, %%mm0\n\t" + "pfacc %%mm0, %%mm0\n\t" + "movd %%mm0, 12(%[result])\n" + "\n\t" + "add $16, %[result]\n\t" + "add $16, %[left]\n\t" "decl %%ecx\n\t" "jnz 0b\n" "\n\t" "femms" - : "+r"(resultPtr), "+r"(left), "+r"(right) - :: "ecx", "mm0", "mm1", "memory" + : [result] "+r" (resultPtr), + [left] "+r" (left), + [right] "+r" (right) + : + : "ecx", "mm0", "mm1", "memory" ); memcpy(self->_values, result, 16 * sizeof(float)); } @@ -174,57 +179,58 @@ static void transformVectors_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, size_t count) { __asm__ __volatile__ ( - "test %0, %0\n\t" + "test %[count], %[count]\n\t" "jz 0f\n" "\n\t" "0:\n\t" - "movq (%1), %%mm0\n\t" - "movq 8(%1), %%mm1\n" + "movq (%[vectors]), %%mm0\n\t" + "movq 8(%[vectors]), %%mm1\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" - "pfmul (%2), %%mm2\n\t" - "pfmul 8(%2), %%mm3\n\t" + "pfmul (%[matrix]), %%mm2\n\t" + "pfmul 8(%[matrix]), %%mm3\n\t" "pfacc %%mm3, %%mm2\n\t" "pfacc %%mm2, %%mm2\n\t" "\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" - "pfmul 16(%2), %%mm3\n\t" - "pfmul 24(%2), %%mm4\n\t" + "pfmul 16(%[matrix]), %%mm3\n\t" + "pfmul 24(%[matrix]), %%mm4\n\t" "pfacc %%mm4, %%mm3\n\t" "pfacc %%mm3, %%mm3\n\t" "\n\t" "punpckldq %%mm3, %%mm2\n\t" - "movq %%mm2, (%1)\n" + "movq %%mm2, (%[vectors])\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" - "pfmul 32(%2), %%mm2\n\t" - "pfmul 40(%2), %%mm3\n\t" + "pfmul 32(%[matrix]), %%mm2\n\t" + "pfmul 40(%[matrix]), %%mm3\n\t" "pfacc %%mm3, %%mm2\n\t" "pfacc %%mm2, %%mm2\n\t" "\n\t" - "pfmul 48(%2), %%mm0\n\t" - "pfmul 56(%2), %%mm1\n\t" + "pfmul 48(%[matrix]), %%mm0\n\t" + "pfmul 56(%[matrix]), %%mm1\n\t" "pfacc %%mm1, %%mm0\n\t" "pfacc %%mm0, %%mm0\n\t" "\n\t" "punpckldq %%mm0, %%mm2\n\t" - "movq %%mm2, 8(%1)\n" + "movq %%mm2, 8(%[vectors])\n" "\n\t" - "add $16, %1\n\t" - "dec %0\n\t" + "add $16, %[vectors]\n\t" + "dec %[count]\n\t" "jnz 0b\n" "\n\t" "0:\n\t" "femms" - : "+r"(count), "+r"(vectors) - : "r"(self->_values) + : [count] "+r" (count), + [vectors] "+r" (vectors) + : [matrix] "r" (self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); } # ifndef __clang__ # pragma GCC pop_options Index: src/OFSystemInfo.m ================================================================== --- src/OFSystemInfo.m +++ src/OFSystemInfo.m @@ -285,12 +285,16 @@ struct X86Regs regs; # if defined(OF_AMD64) && defined(__GNUC__) __asm__ ( "cpuid" - : "=a"(regs.eax), "=b"(regs.ebx), "=c"(regs.ecx), "=d"(regs.edx) - : "a"(eax), "c"(ecx) + : "=a" (regs.eax), + "=b" (regs.ebx), + "=c" (regs.ecx), + "=d" (regs.edx) + : "a" (eax), + "c" (ecx) ); # elif defined(OF_X86) && defined(__GNUC__) /* * This workaround is required by older GCC versions when using -fPIC, * as ebx is a special register in PIC code. Yes, GCC is indeed not @@ -299,12 +303,16 @@ */ __asm__ ( "xchgl %%ebx, %%edi\n\t" "cpuid\n\t" "xchgl %%edi, %%ebx" - : "=a"(regs.eax), "=D"(regs.ebx), "=c"(regs.ecx), "=d"(regs.edx) - : "a"(eax), "c"(ecx) + : "=a" (regs.eax), + "=D" (regs.ebx), + "=c" (regs.ecx), + "=d" (regs.edx) + : "a" (eax), + "c" (ecx) ); # else memset(®s, 0, sizeof(regs)); # endif Index: src/macros.h ================================================================== --- src/macros.h +++ src/macros.h @@ -496,24 +496,25 @@ #if defined(OF_HAVE_BUILTIN_BSWAP16) return __builtin_bswap16(i); #elif (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) __asm__ ( "xchg{b} { %h0, %b0 | %b0, %h0 }" - : "=Q"(i) - : "0"(i) + : "=Q" (i) + : "0" (i) ); #elif defined(OF_POWERPC) && defined(__GNUC__) __asm__ ( "lhbrx %0, 0, %1" - : "=r"(i) - : "r"(&i), "m"(i) + : "=r" (i) + : "r" (&i), + "m" (i) ); #elif defined(OF_ARMV6) && defined(__GNUC__) __asm__ ( "rev16 %0, %0" - : "=r"(i) - : "0"(i) + : "=r" (i) + : "0" (i) ); #else i = (i & UINT16_C(0xFF00)) >> 8 | (i & UINT16_C(0x00FF)) << 8; #endif @@ -526,24 +527,25 @@ #if defined(OF_HAVE_BUILTIN_BSWAP32) return __builtin_bswap32(i); #elif (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) __asm__ ( "bswap %0" - : "=q"(i) - : "0"(i) + : "=q" (i) + : "0" (i) ); #elif defined(OF_POWERPC) && defined(__GNUC__) __asm__ ( "lwbrx %0, 0, %1" - : "=r"(i) - : "r"(&i), "m"(i) + : "=r" (i) + : "r" (&i), + "m" (i) ); #elif defined(OF_ARMV6) && defined(__GNUC__) __asm__ ( "rev %0, %0" - : "=r"(i) - : "0"(i) + : "=r" (i) + : "0" (i) ); #else i = (i & UINT32_C(0xFF000000)) >> 24 | (i & UINT32_C(0x00FF0000)) >> 8 | (i & UINT32_C(0x0000FF00)) << 8 | @@ -558,20 +560,20 @@ #if defined(OF_HAVE_BUILTIN_BSWAP64) return __builtin_bswap64(i); #elif defined(OF_AMD64) && defined(__GNUC__) __asm__ ( "bswap %0" - : "=r"(i) - : "0"(i) + : "=r" (i) + : "0" (i) ); #elif defined(OF_X86) && defined(__GNUC__) __asm__ ( "bswap {%%}eax\n\t" "bswap {%%}edx\n\t" "xchg{l} { %%eax, %%edx | edx, eax }" - : "=A"(i) - : "0"(i) + : "=A" (i) + : "0" (i) ); #else i = (uint64_t)OFByteSwap32NonConst( (uint32_t)(i & UINT32_C(0xFFFFFFFF))) << 32 | OFByteSwap32NonConst((uint32_t)(i >> 32)); Index: src/platform/PowerPC/OFAtomic.h ================================================================== --- src/platform/PowerPC/OFAtomic.h +++ src/platform/PowerPC/OFAtomic.h @@ -20,12 +20,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "add %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -37,12 +38,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "add %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -54,12 +56,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "add %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return (void *)i; } @@ -71,12 +74,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "sub %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -88,12 +92,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "sub %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -105,12 +110,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "sub %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return (void *)i; } @@ -124,12 +130,12 @@ "0:\n\t" "lwarx %0, 0, %1\n\t" "addi %0, %0, 1\n\t" "stwcx. %0, 0, %1\n\t" "bne- 0b" - : "=&r"(i) - : "r"(p) + : "=&r" (i) + : "r" (p) : "cc", "memory" ); return i; } @@ -143,12 +149,12 @@ "0:\n\t" "lwarx %0, 0, %1\n\t" "addi %0, %0, 1\n\t" "stwcx. %0, 0, %1\n\t" "bne- 0b" - : "=&r"(i) - : "r"(p) + : "=&r" (i) + : "r" (p) : "cc", "memory" ); return i; } @@ -162,12 +168,12 @@ "0:\n\t" "lwarx %0, 0, %1\n\t" "subi %0, %0, 1\n\t" "stwcx. %0, 0, %1\n\t" "bne- 0b" - : "=&r"(i) - : "r"(p) + : "=&r" (i) + : "r" (p) : "cc", "memory" ); return i; } @@ -181,12 +187,12 @@ "0:\n\t" "lwarx %0, 0, %1\n\t" "subi %0, %0, 1\n\t" "stwcx. %0, 0, %1\n\t" "bne- 0b" - : "=&r"(i) - : "r"(p) + : "=&r" (i) + : "r" (p) : "cc", "memory" ); return i; } @@ -198,12 +204,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "or %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -215,12 +222,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "or %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -232,12 +240,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "and %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -249,12 +258,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "and %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -266,12 +276,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "xor %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -283,12 +294,13 @@ "0:\n\t" "lwarx %0, 0, %2\n\t" "xor %0, %0, %1\n\t" "stwcx. %0, 0, %2\n\t" "bne- 0b" - : "=&r"(i) - : "r"(i), "r"(p) + : "=&r" (i) + : "r" (i), + "r" (p) : "cc", "memory" ); return i; } @@ -309,12 +321,14 @@ "b 2f\n\t" "1:\n\t" "stwcx. %0, 0, %3\n\t" "li %0, 0\n\t" "2:" - : "=&r"(r) - : "r"(o), "r"(n), "r"(p) + : "=&r" (r) + : "r" (o), + "r" (n), + "r" (p) : "cc", "memory" ); return r; } @@ -335,12 +349,14 @@ "b 2f\n\t" "1:\n\t" "stwcx. %0, 0, %3\n\t" "li %0, 0\n\t" "2:" - : "=&r"(r) - : "r"(o), "r"(n), "r"(p) + : "=&r" (r) + : "r" (o), + "r" (n), + "r" (p) : "cc", "memory" ); return r; } @@ -362,12 +378,14 @@ "b 2f\n\t" "1:\n\t" "stwcx. %0, 0, %3\n\t" "li %0, 0\n\t" "2:" - : "=&r"(r) - : "r"(o), "r"(n), "r"(p) + : "=&r" (r) + : "r" (o), + "r" (n), + "r" (p) : "cc", "memory" ); return r; } Index: src/platform/x86/OFAtomic.h ================================================================== --- src/platform/x86/OFAtomic.h +++ src/platform/x86/OFAtomic.h @@ -20,12 +20,13 @@ { __asm__ __volatile__ ( "lock\n\t" "xadd{l} { %0, %2 | %2, %0 }\n\t" "add{l} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); return i; } @@ -38,12 +39,13 @@ else if (sizeof(int) == 8) __asm__ __volatile__ ( "lock\n\t" "xadd{q} { %0, %2 | %2, %0 }\n\t" "add{q} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); #endif else abort(); @@ -56,22 +58,24 @@ #if defined(OF_AMD64) __asm__ __volatile__ ( "lock\n\t" "xadd{q} { %0, %2 | %2, %0 }\n\t" "add{q} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #elif defined(OF_X86) __asm__ __volatile__ ( "lock\n\t" "xadd{l} { %0, %2 | %2, %0 }\n\t" "add{l} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #endif } @@ -82,12 +86,13 @@ __asm__ __volatile__ ( "neg{l} %0\n\t" "lock\n\t" "xadd{l} { %0, %2 | %2, %0 }\n\t" "sub{l} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); return i; } @@ -101,12 +106,13 @@ __asm__ __volatile__ ( "neg{q} %0\n\t" "lock\n\t" "xadd{q} { %0, %2 | %2, %0 }\n\t" "sub{q} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); #endif else abort(); @@ -120,23 +126,25 @@ __asm__ __volatile__ ( "neg{q} %0\n\t" "lock\n\t" "xadd{q} { %0, %2 | %2, %0 }\n\t" "sub{q} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #elif defined(OF_X86) __asm__ __volatile__ ( "neg{l} %0\n\t" "lock\n\t" "xadd{l} { %0, %2 | %2, %0 }\n\t" "sub{l} { %1, %0 | %0, %1 }" - : "+&r"(i) - : "r"(i), "m"(*p) + : "+&r" (i) + : "r" (i), + "m" (*p) ); return (void *)i; #endif } @@ -150,12 +158,12 @@ "xor{l} %0, %0\n\t" "inc{l} %0\n\t" "lock\n\t" "xadd{l} { %0, %1 | %1, %0 }\n\t" "inc{l} %0" - : "=&r"(i) - : "m"(*p) + : "=&r" (i) + : "m" (*p) ); return i; } @@ -172,12 +180,12 @@ "xor{q} %0, %0\n\t" "inc{q} %0\n\t" "lock\n\t" "xadd{q} { %0, %1 | %1, %0 }\n\t" "inc{q} %0" - : "=&r"(i) - : "m"(*p) + : "=&r" (i) + : "m" (*p) ); #endif else abort(); @@ -193,12 +201,12 @@ "xor{l} %0, %0\n\t" "dec{l} %0\n\t" "lock\n\t" "xadd{l} { %0, %1 | %1, %0 }\n\t" "dec{l} %0" - : "=&r"(i) - : "m"(*p) + : "=&r" (i) + : "m" (*p) ); return i; } @@ -215,12 +223,12 @@ "xor{q} %0, %0\n\t" "dec{q} %0\n\t" "lock\n\t" "xadd{q} { %0, %1 | %1, %0 }\n\t" "dec{q} %0" - : "=&r"(i) - : "m"(*p) + : "=&r" (i) + : "m" (*p) ); #endif else abort(); @@ -236,12 +244,13 @@ "mov{l} { %0, %%eax | eax, %0 }\n\t" "or{l} { %1, %0 | %0, %1 }\n\t" "lock\n\t" "cmpxchg{l} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "eax", "cc" ); return i; } @@ -259,12 +268,13 @@ "mov{q} { %0, %%rax | rax, %0 }\n\t" "or{q} { %1, %0 | %0, %1 }\n\t" "lock\n\t" "cmpxchg{q} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "rax", "cc" ); #endif else abort(); @@ -281,12 +291,13 @@ "mov{l} { %0, %%eax | eax, %0 }\n\t" "and{l} { %1, %0 | %0, %1 }\n\t" "lock\n\t" "cmpxchg{l} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "eax", "cc" ); return i; } @@ -304,12 +315,13 @@ "mov{q} { %0, %%rax | rax, %0 }\n\t" "and{q} { %1, %0 | %0, %1 }\n\t" "lock\n\t" "cmpxchg{q} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "rax", "cc" ); #endif else abort(); @@ -326,12 +338,13 @@ "mov{l} { %0, %%eax | eax, %0 }\n\t" "xor{l} { %1, %0 | %0, %1 }\n\t" "lock\n\t" "cmpxchg{l} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "eax", "cc" ); return i; } @@ -349,12 +362,13 @@ "mov{q} { %0, %%rax | rax, %0 }\n\t" "xor{q} { %1, %0 | %0, %1 }\n\t" "lock\n\t" "cmpxchg{q} { %0, %2 | %2, %0 }\n\t" "jne 0b" - : "=&r"(i) - : "r"(i), "m"(*p) + : "=&r" (i) + : "r" (i), + "m" (*p) : "rax", "cc" ); #endif else abort(); @@ -370,12 +384,14 @@ __asm__ __volatile__ ( "lock\n\t" "cmpxchg{l} { %2, %3 | %3, %2 }\n\t" "sete %b0\n\t" "movz{bl|x} { %b0, %0 | %0, %b0 }" - : "=&d"(r), "+a"(o) /* use d instead of r to avoid a gcc bug */ - : "r"(n), "m"(*p) + : "=&d" (r), /* use d instead of r to avoid a gcc bug */ + "+a" (o) + : "r" (n), + "m" (*p) : "cc" ); return r; } @@ -388,12 +404,14 @@ __asm__ __volatile__ ( "lock\n\t" "cmpxchg { %2, %3 | %3, %2 }\n\t" "sete %b0\n\t" "movz{bl|x} { %b0, %0 | %0, %b0 }" - : "=&d"(r), "+a"(o) /* use d instead of r to avoid a gcc bug */ - : "r"(n), "m"(*p) + : "=&d" (r), /* use d instead of r to avoid a gcc bug */ + "+a" (o) + : "r" (n), + "m" (*p) : "cc" ); return r; } @@ -407,12 +425,14 @@ __asm__ __volatile__ ( "lock\n\t" "cmpxchg { %2, %3 | %3, %2 }\n\t" "sete %b0\n\t" "movz{bl|x} { %b0, %0 | %0, %b0 }" - : "=&d"(r), "+a"(o) /* use d instead of r to avoid a gcc bug */ - : "r"(n), "m"(*p) + : "=&d" (r), /* use d instead of r to avoid a gcc bug */ + "+a" (o) + : "r" (n), + "m" (*p) : "cc" ); return r; }