Index: src/OFMatrix4x4.m ================================================================== --- src/OFMatrix4x4.m +++ src/OFMatrix4x4.m @@ -72,120 +72,10 @@ } # ifndef __clang__ # pragma GCC pop_options # endif -# ifndef __clang__ -# pragma GCC push_options -# pragma GCC target("3dnow,3dnowa") -# endif -static void -multiplyWithMatrix_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, - OFMatrix4x4 *matrix) -{ - float (*left)[4] = matrix->_values, (*right)[4] = self->_values; - float result[4][4], (*resultPtr)[4] = result; - - __asm__ __volatile__ ( - "xorw %%cx, %%cx\n" - "\n\t" - "0:\n\t" - "movd (%2), %%mm0\n\t" - "punpckldq 16(%2), %%mm0\n\t" - "pfmul (%1), %%mm0\n\t" - "movd 32(%2), %%mm1\n\t" - "punpckldq 48(%2), %%mm1\n\t" - "pfmul 8(%1), %%mm1\n\t" - "pfadd %%mm1, %%mm0\n\t" - "pswapd %%mm0, %%mm1\n\t" - "pfadd %%mm1, %%mm0\n\t" - "movd %%mm0, (%0)\n" - "\n\t" - "add $4, %0\n\t" - "add $4, %2\n\t" - "incb %%cl\n\t" - "cmpb $4, %%cl\n\t" - "jb 0b\n" - "\n\t" - "add $16, %1\n\t" - "sub $16, %2\n\t" - "xorb %%cl, %%cl\n\t" - "incb %%ch\n\t" - "cmpb $4, %%ch\n\t" - "jb 0b\n" - "\n\t" - "femms" - : "+r"(resultPtr), "+r"(left), "+r"(right) - :: "cx", "mm0", "mm1", "memory" - ); - - memcpy(self->_values, result, 16 * sizeof(float)); -} - -static void -transformVectors_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, - size_t count) -{ - __asm__ __volatile__ ( - "test %0, %0\n\t" - "jz 0f\n" - "\n\t" - "0:\n\t" - "movq (%1), %%mm0\n\t" - "movq 8(%1), %%mm1\n" - "\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm1, %%mm3\n\t" - "pfmul (%2), %%mm2\n\t" - "pfmul 8(%2), %%mm3\n\t" - "pfadd %%mm3, %%mm2\n\t" - "pswapd %%mm2, %%mm3\n\t" - "pfadd %%mm3, %%mm2\n" - "\n\t" - "movq %%mm0, %%mm3\n\t" - "movq %%mm1, %%mm4\n\t" - "pfmul 16(%2), %%mm3\n\t" - "pfmul 24(%2), %%mm4\n\t" - "pfadd %%mm4, %%mm3\n\t" - "pswapd %%mm3, %%mm4\n\t" - "pfadd %%mm4, %%mm3\n" - "\n\t" - "punpckldq %%mm3, %%mm2\n\t" - "movq %%mm2, (%1)\n" - "\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm1, %%mm3\n\t" - "pfmul 32(%2), %%mm2\n\t" - "pfmul 40(%2), %%mm3\n\t" - "pfadd %%mm3, %%mm2\n\t" - "pswapd %%mm2, %%mm3\n\t" - "pfadd %%mm3, %%mm2\n" - "\n\t" - "pfmul 48(%2), %%mm0\n\t" - "pfmul 56(%2), %%mm1\n\t" - "pfadd %%mm1, %%mm0\n\t" - "pswapd %%mm0, %%mm1\n\t" - "pfadd %%mm1, %%mm0\n" - "\n\t" - "punpckldq %%mm0, %%mm2\n\t" - "movq %%mm2, 8(%1)\n" - "\n\t" - "add $16, %1\n\t" - "dec %0\n\t" - "jnz 0b\n" - "\n\t" - "0:\n\t" - "femms" - : "+r"(count), "+r"(vectors) - : "r"(self->_values) - : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" - ); -} -# ifndef __clang__ -# pragma GCC pop_options -# endif - # ifndef __clang__ # pragma GCC push_options # pragma GCC target("3dnow") # endif static void @@ -202,14 +92,12 @@ "punpckldq 16(%2), %%mm0\n\t" "pfmul (%1), %%mm0\n\t" "movd 32(%2), %%mm1\n\t" "punpckldq 48(%2), %%mm1\n\t" "pfmul 8(%1), %%mm1\n\t" - "pfadd %%mm1, %%mm0\n\t" - "movq %%mm0, %%mm1\n\t" - "psrlq $32, %%mm1\n\t" - "pfadd %%mm1, %%mm0\n\t" + "pfacc %%mm1, %%mm0\n\t" + "pfacc %%mm0, %%mm0\n\t" "movd %%mm0, (%0)\n" "\n\t" "add $4, %0\n\t" "add $4, %2\n\t" "incb %%cl\n\t" @@ -245,42 +133,34 @@ "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "pfmul (%2), %%mm2\n\t" "pfmul 8(%2), %%mm3\n\t" - "pfadd %%mm3, %%mm2\n\t" - "movq %%mm2, %%mm3\n\t" - "psrlq $32, %%mm3\n\t" - "pfadd %%mm3, %%mm2\n" + "pfacc %%mm3, %%mm2\n\t" + "pfacc %%mm2, %%mm2\n\t" "\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "pfmul 16(%2), %%mm3\n\t" "pfmul 24(%2), %%mm4\n\t" - "pfadd %%mm4, %%mm3\n\t" - "movq %%mm3, %%mm4\n\t" - "psrlq $32, %%mm4\n\t" - "pfadd %%mm4, %%mm3\n" + "pfacc %%mm4, %%mm3\n\t" + "pfacc %%mm3, %%mm3\n\t" "\n\t" "punpckldq %%mm3, %%mm2\n\t" "movq %%mm2, (%1)\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "pfmul 32(%2), %%mm2\n\t" "pfmul 40(%2), %%mm3\n\t" - "pfadd %%mm3, %%mm2\n\t" - "movq %%mm2, %%mm3\n\t" - "psrlq $32, %%mm3\n\t" - "pfadd %%mm3, %%mm2\n" + "pfacc %%mm3, %%mm2\n\t" + "pfacc %%mm2, %%mm2\n\t" "\n\t" "pfmul 48(%2), %%mm0\n\t" "pfmul 56(%2), %%mm1\n\t" - "pfadd %%mm1, %%mm0\n\t" - "movq %%mm0, %%mm1\n\t" - "psrlq $32, %%mm1\n\t" - "pfadd %%mm1, %%mm0\n" + "pfacc %%mm1, %%mm0\n\t" + "pfacc %%mm0, %%mm0\n\t" "\n\t" "punpckldq %%mm0, %%mm2\n\t" "movq %%mm2, 8(%1)\n" "\n\t" "add $16, %1\n\t" @@ -312,21 +192,14 @@ if ([OFSystemInfo supportsSSE41]) { REPLACE(@selector(transformVectors:count:), transformVectors_SSE41) } else if ([OFSystemInfo supports3DNow]) { - if ([OFSystemInfo supportsEnhanced3DNow]) { - REPLACE(@selector(multiplyWithMatrix:), - multiplyWithMatrix_enhanced3DNow) - REPLACE(@selector(transformVectors:count:), - transformVectors_enhanced3DNow) - } else { - REPLACE(@selector(multiplyWithMatrix:), - multiplyWithMatrix_3DNow) - REPLACE(@selector(transformVectors:count:), - transformVectors_3DNow) - } + REPLACE(@selector(multiplyWithMatrix:), + multiplyWithMatrix_3DNow) + REPLACE(@selector(transformVectors:count:), + transformVectors_3DNow) } # undef REPLACE } #endif