/* * Copyright (c) 2008-2023 Jonathan Schleifer * * All rights reserved. * * This file is part of ObjFW. It may be distributed under the terms of the * Q Public License 1.0, which can be found in the file LICENSE.QPL included in * the packaging of this file. * * Alternatively, it may be distributed under the terms of the GNU General * Public License, either version 2 or 3, which can be found in the file * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this * file. */ #include "config.h" #import "OFMatrix4x4.h" #import "OFString.h" #import "OFSystemInfo.h" #import "OFOnce.h" static const float identityValues[4][4] = { { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; @implementation OFMatrix4x4 #if defined(OF_AMD64) || defined(OF_X86) static void multiplyWithMatrix_3DNow(OFMatrix4x4 *self, SEL _cmd, OFMatrix4x4 *matrix) { float result[4][4]; for (uint_fast8_t i = 0; i < 4; i++) { for (uint_fast8_t j = 0; j < 4; j++) { __asm__ ( "movd (%2), %%mm0\n\t" "punpckldq 16(%2), %%mm0\n\t" "pfmul (%1), %%mm0\n\t" "movd 32(%2), %%mm1\n\t" "punpckldq 48(%2), %%mm1\n\t" "pfmul 8(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movd %%mm0, %0" :: "m"(result[i][j]), "r"(&matrix->_values[i][0]), "r"(&self->_values[0][j]) : "mm0", "mm1", "memory" ); } } __asm__ ("femms"); memcpy(self->_values, result, sizeof(result)); } static OFVector4D transformedVector_3DNow(OFMatrix4x4 *self, SEL _cmd, OFVector4D vector) { OFVector4D result; __asm__ ( "movq (%2), %%mm0\n\t" "movq 8(%2), %%mm1\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "pfmul (%1), %%mm2\n\t" "pfmul 8(%1), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "movq %%mm2, %%mm3\n\t" "psrlq $32, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "pfmul 16(%1), %%mm3\n\t" "pfmul 24(%1), %%mm4\n\t" "pfadd %%mm4, %%mm3\n\t" "movq %%mm3, %%mm4\n\t" "psrlq $32, %%mm4\n\t" "pfadd %%mm4, %%mm3\n" "\n\t" "punpckldq %%mm3, %%mm2\n\t" "movq %%mm2, (%0)\n" "\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "pfmul 32(%1), %%mm2\n\t" "pfmul 40(%1), %%mm3\n\t" "pfadd %%mm3, %%mm2\n\t" "movq %%mm2, %%mm3\n\t" "psrlq $32, %%mm3\n\t" "pfadd %%mm3, %%mm2\n" "\n\t" "pfmul 48(%1), %%mm0\n\t" "pfmul 56(%1), %%mm1\n\t" "pfadd %%mm1, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfadd %%mm1, %%mm0\n" "\n\t" "punpckldq %%mm0, %%mm2\n\t" "movq %%mm2, 8(%0)\n" "\n\t" "femms" :: "r"(&result), "r"(&self->_values), "r"(&vector) : "mm0", "mm1", "mm2", "mm3", "mm4", "memory" ); return result; } + (void)initialize { if (self != [OFMatrix4x4 class]) return; if ([OFSystemInfo supports3DNow]) { SEL selector; const char *typeEncoding; selector = @selector(multiplyWithMatrix:); typeEncoding = method_getTypeEncoding( class_getInstanceMethod(self, selector)); class_replaceMethod(self, selector, (IMP)multiplyWithMatrix_3DNow, typeEncoding); selector = @selector(transformedVector:); typeEncoding = method_getTypeEncoding( class_getInstanceMethod(self, selector)); class_replaceMethod(self, selector, (IMP)transformedVector_3DNow, typeEncoding); } } #endif + (OFMatrix4x4 *)identityMatrix { return [[[OFMatrix4x4 alloc] initWithValues: identityValues] autorelease]; } + (instancetype)matrixWithValues: (const float [4][4])values { return [[[self alloc] initWithValues: values] autorelease]; } - (instancetype)init { OF_INVALID_INIT_METHOD } - (instancetype)initWithValues: (const float [4][4])values { self = [super init]; memcpy(_values, values, sizeof(_values)); return self; } - (float (*)[4])values { return _values; } - (instancetype)copy { return [[OFMatrix4x4 alloc] initWithValues: (const float (*)[4])_values]; } - (bool)isEqual: (OFMatrix4x4 *)matrix { if (![matrix isKindOfClass: [OFMatrix4x4 class]]) return false; return (memcmp(_values, matrix->_values, sizeof(_values)) == 0); } - (unsigned long)hash { unsigned long hash; OFHashInit(&hash); for (uint_fast8_t i = 0; i < 4; i++) for (uint_fast8_t j = 0; j < 4; j++) OFHashAddHash(&hash, OFFloatToRawUInt32(_values[i][j])); OFHashFinalize(&hash); return hash; } - (void)multiplyWithMatrix: (OFMatrix4x4 *)matrix { float result[4][4] = {{ 0 }}; for (uint_fast8_t i = 0; i < 4; i++) for (uint_fast8_t j = 0; j < 4; j++) for (uint_fast8_t k = 0; k < 4; k++) result[i][j] += matrix->_values[i][k] * _values[k][j]; memcpy(_values, result, sizeof(result)); } - (void)translateWithVector: (OFVector3D)vector { OFMatrix4x4 *translation = [[OFMatrix4x4 alloc] initWithValues: (const float [4][4]){ { 1, 0, 0, vector.x }, { 0, 1, 0, vector.y }, { 0, 0, 1, vector.z }, { 0, 0, 0, 1 } }]; [self multiplyWithMatrix: translation]; [translation release]; } - (void)scaleWithVector: (OFVector3D)vector { OFMatrix4x4 *scale = [[OFMatrix4x4 alloc] initWithValues: (const float [4][4]){ { vector.x, 0, 0, 0 }, { 0, vector.y, 0, 0 }, { 0, 0, vector.z, 0 }, { 0, 0, 0, 1 } }]; [self multiplyWithMatrix: scale]; [scale release]; } - (OFVector4D)transformedVector: (OFVector4D)vector { return OFMakeVector4D( _values[0][0] * vector.x + _values[0][1] * vector.y + _values[0][2] * vector.z + _values[0][3] * vector.w, _values[1][0] * vector.x + _values[1][1] * vector.y + _values[1][2] * vector.z + _values[1][3] * vector.w, _values[2][0] * vector.x + _values[2][1] * vector.y + _values[2][2] * vector.z + _values[2][3] * vector.w, _values[3][0] * vector.x + _values[3][1] * vector.y + _values[3][2] * vector.z + _values[3][3] * vector.w); } - (OFString *)description { return [OFString stringWithFormat: @"", _values[0][0], _values[0][1], _values[0][2], _values[0][3], _values[1][0], _values[1][1], _values[1][2], _values[1][3], _values[2][0], _values[2][1], _values[2][2], _values[2][3], _values[3][0], _values[3][1], _values[3][2], _values[3][3]]; } @end