Index: src/OFMatrix4x4.h ================================================================== --- src/OFMatrix4x4.h +++ src/OFMatrix4x4.h @@ -88,8 +88,15 @@ * * @param vector The vector to transform * @return The transformed vector */ - (OFVector4D)transformedVector: (OFVector4D)vector; + +/** + * @brief Transforms the specified vectors in-place according to the matrix. + * + * @param vectors The vectors to transform + */ +- (void)transformVectors: (OFVector4D *)vectors count: (size_t)count; @end OF_ASSUME_NONNULL_END Index: src/OFMatrix4x4.m ================================================================== --- src/OFMatrix4x4.m +++ src/OFMatrix4x4.m @@ -27,11 +27,11 @@ { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 -#if defined(OF_AMD64) || defined(OF_X86) +#if (defined(OF_AMD64) || defined(OF_X86)) && defined(__GNUC__) # ifndef __clang__ # pragma GCC push_options # pragma GCC target("3dnow") # endif static void @@ -93,119 +93,133 @@ __asm__ ("femms"); memcpy(self->_values, result, sizeof(result)); } -static OFVector4D -transformedVector_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D vector) +static void +transformVectors_enhanced3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, + size_t count) { - OFVector4D result; - __asm__ ( - "movq (%2), %%mm0\n\t" - "movq 8(%2), %%mm1\n" + "0:\n\t" + "test %0, %0\n\t" + "jz 0f\n" + "\n\t" + "movq (%1), %%mm0\n\t" + "movq 8(%1), %%mm1\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" - "pfmul (%1), %%mm2\n\t" - "pfmul 8(%1), %%mm3\n\t" + "pfmul (%2), %%mm2\n\t" + "pfmul 8(%2), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "pswapd %%mm2, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" - "pfmul 16(%1), %%mm3\n\t" - "pfmul 24(%1), %%mm4\n\t" + "pfmul 16(%2), %%mm3\n\t" + "pfmul 24(%2), %%mm4\n\t" "pfadd %%mm4, %%mm3\n\t" "pswapd %%mm3, %%mm4\n\t" "pfadd %%mm4, %%mm3\n" "\n\t" "punpckldq %%mm3, %%mm2\n\t" - "movq %%mm2, (%0)\n" + "movq %%mm2, (%1)\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" - "pfmul 32(%1), %%mm2\n\t" - "pfmul 40(%1), %%mm3\n\t" + "pfmul 32(%2), %%mm2\n\t" + "pfmul 40(%2), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "pswapd %%mm2, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" - "pfmul 48(%1), %%mm0\n\t" - "pfmul 56(%1), %%mm1\n\t" + "pfmul 48(%2), %%mm0\n\t" + "pfmul 56(%2), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "pswapd %%mm0, %%mm1\n\t" "pfadd %%mm1, %%mm0\n" "\n\t" "punpckldq %%mm0, %%mm2\n\t" - "movq %%mm2, 8(%0)\n" + "movq %%mm2, 8(%1)\n" + "\n\t" + "add $16, %1\n\t" + "dec %0\n\t" + "jmp 0b\n" "\n\t" + "0:\n\t" "femms" - :: "r"(&result), "r"(&self->_values), "r"(&vector) + : "+r"(count), "+r"(vectors) + : "r"(&self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); - - return result; } -static OFVector4D -transformedVector_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D vector) +static void +transformVectors_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D *vectors, + size_t count) { - OFVector4D result; - __asm__ ( - "movq (%2), %%mm0\n\t" - "movq 8(%2), %%mm1\n" + "0:\n\t" + "test %0, %0\n\t" + "jz 0f\n" + "\n\t" + "movq (%1), %%mm0\n\t" + "movq 8(%1), %%mm1\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" - "pfmul (%1), %%mm2\n\t" - "pfmul 8(%1), %%mm3\n\t" + "pfmul (%2), %%mm2\n\t" + "pfmul 8(%2), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "movq %%mm2, %%mm3\n\t" "psrlq $32, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" - "pfmul 16(%1), %%mm3\n\t" - "pfmul 24(%1), %%mm4\n\t" + "pfmul 16(%2), %%mm3\n\t" + "pfmul 24(%2), %%mm4\n\t" "pfadd %%mm4, %%mm3\n\t" "movq %%mm3, %%mm4\n\t" "psrlq $32, %%mm4\n\t" "pfadd %%mm4, %%mm3\n" "\n\t" "punpckldq %%mm3, %%mm2\n\t" - "movq %%mm2, (%0)\n" + "movq %%mm2, (%1)\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" - "pfmul 32(%1), %%mm2\n\t" - "pfmul 40(%1), %%mm3\n\t" + "pfmul 32(%2), %%mm2\n\t" + "pfmul 40(%2), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "movq %%mm2, %%mm3\n\t" "psrlq $32, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" - "pfmul 48(%1), %%mm0\n\t" - "pfmul 56(%1), %%mm1\n\t" + "pfmul 48(%2), %%mm0\n\t" + "pfmul 56(%2), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfadd %%mm1, %%mm0\n" "\n\t" "punpckldq %%mm0, %%mm2\n\t" - "movq %%mm2, 8(%0)\n" + "movq %%mm2, 8(%1)\n" "\n\t" + "add $16, %1\n\t" + "dec %0\n\t" + "jmp 0b\n" + "0:\n\t" "femms" - :: "r"(&result), "r"(&self->_values), "r"(&vector) + : "+r"(count), "+r"(vectors) + : "r"(&self->_values) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); - - return result; } + # ifndef __clang__ # pragma GCC pop_options # endif + (void)initialize @@ -221,16 +235,17 @@ class_replaceMethod(self, selector, (IMP)func, typeEncoding); if ([OFSystemInfo supportsEnhanced3DNow]) { REPLACE(@selector(multiplyWithMatrix:), multiplyWithMatrix_enhanced3DNow) - REPLACE(@selector(transformedVector:), - transformedVector_enhanced3DNow) + REPLACE(@selector(transformVectors:count:), + transformVectors_enhanced3DNow) } else if ([OFSystemInfo supports3DNow]) { REPLACE(@selector(multiplyWithMatrix:), multiplyWithMatrix_3DNow) - REPLACE(@selector(transformedVector:), transformedVector_3DNow) + REPLACE(@selector(transformVectors:count:), + transformVectors_3DNow) } # undef REPLACE } #endif @@ -335,19 +350,35 @@ [scale release]; } - (OFVector4D)transformedVector: (OFVector4D)vector { - return OFMakeVector4D( - _values[0][0] * vector.x + _values[0][1] * vector.y + - _values[0][2] * vector.z + _values[0][3] * vector.w, - _values[1][0] * vector.x + _values[1][1] * vector.y + - _values[1][2] * vector.z + _values[1][3] * vector.w, - _values[2][0] * vector.x + _values[2][1] * vector.y + - _values[2][2] * vector.z + _values[2][3] * vector.w, - _values[3][0] * vector.x + _values[3][1] * vector.y + - _values[3][2] * vector.z + _values[3][3] * vector.w); + OFVector4D copy = vector; + + [self transformVectors: © count: 1]; + + return copy; +} + +- (void)transformVectors: (OFVector4D *)vectors count: (size_t)count +{ + for (size_t i = 0; i < count; i++) { + OFVector4D vector = vectors[i]; + + vectors[i].x = _values[0][0] * vector.x + + _values[0][1] * vector.y + _values[0][2] * vector.z + + _values[0][3] * vector.w; + vectors[i].y = _values[1][0] * vector.x + + _values[1][1] * vector.y + _values[1][2] * vector.z + + _values[1][3] * vector.w; + vectors[i].z = _values[2][0] * vector.x + + _values[2][1] * vector.y + _values[2][2] * vector.z + + _values[2][3] * vector.w; + vectors[i].w = _values[3][0] * vector.x + + _values[3][1] * vector.y + _values[3][2] * vector.z + + _values[3][3] * vector.w; + } } - (OFString *)description { return [OFString stringWithFormat: Index: tests/OFMatrix4x4Tests.m ================================================================== --- tests/OFMatrix4x4Tests.m +++ tests/OFMatrix4x4Tests.m @@ -23,10 +23,11 @@ - (void)matrix4x4Tests { void *pool = objc_autoreleasePoolPush(); OFMatrix4x4 *matrix, *matrix2; OFVector4D point; + OFVector4D points[2] = {{ 1, 2, 3, 1 }, { 7, 8, 9, 2 }}; TEST(@"+[identityMatrix]", memcmp([[OFMatrix4x4 identityMatrix] values], (const float [4][4]){ { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, @@ -91,13 +92,14 @@ R([matrix2 scaleWithVector: OFMakeVector3D(-1, 0.5f, 2)]) && R(point = [matrix2 transformedVector: OFMakeVector4D(2, 3, 4, 1)]) && point.x == -3 && point.y == 2.5 && point.z == 14 && point.w == 1) - TEST(@"-[transformedVector:]", - R((point = - [matrix transformedVector: OFMakeVector4D(1, 2, 3, 1)])) && - point.x == 18 && point.y == 46 && point.z == 74 && point.w == 102) + TEST(@"-[transformVectors:count:]", + R([matrix transformVectors: points count: 2]) && + points[0].x == 18 && points[0].y == 46 && points[0].z == 74 && + points[0].w == 102 && points[1].x == 58 && points[1].y == 162 && + points[1].z == 266 && points[1].w == 370) objc_autoreleasePoolPop(pool); } @end