Index: src/OFSystemInfo.h ================================================================== --- src/OFSystemInfo.h +++ src/OFSystemInfo.h @@ -236,94 +236,78 @@ * @return Whether the CPU supports enhanced 3DNow! */ + (bool)supportsEnhanced3DNow; /** - * @brief Returns whether the CPU supports SSE. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support SSE. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports SSE + * @return Whether the CPU and OS support SSE */ + (bool)supportsSSE; /** - * @brief Returns whether the CPU supports SSE2. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support SSE2. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports SSE2 + * @return Whether the CPU and OS support SSE2 */ + (bool)supportsSSE2; /** - * @brief Returns whether the CPU supports SSE3. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support SSE3. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports SSE3 + * @return Whether the CPU and OS support SSE3 */ + (bool)supportsSSE3; /** - * @brief Returns whether the CPU supports SSSE3. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support SSSE3. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports SSSE3 + * @return Whether the CPU and OS support SSSE3 */ + (bool)supportsSSSE3; /** - * @brief Returns whether the CPU supports SSE4.1. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support SSE4.1. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports SSE4.1 + * @return Whether the CPU and OS support SSE4.1 */ + (bool)supportsSSE41; /** - * @brief Returns whether the CPU supports SSE4.2. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support SSE4.2. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports SSE4.2 + * @return Whether the CPU and OS support SSE4.2 */ + (bool)supportsSSE42; /** - * @brief Returns whether the CPU supports AVX. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support AVX. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX + * @return Whether the CPU and OS support AVX */ + (bool)supportsAVX; /** - * @brief Returns whether the CPU supports AVX2. - * - * @warning This method only checks CPU support and assumes OS support! + * @brief Returns whether the CPU and OS support AVX2. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX2 + * @return Whether the CPU and OS support AVX2 */ + (bool)supportsAVX2; /** * @brief Returns whether the CPU supports AES-NI. @@ -344,12 +328,10 @@ + (bool)supportsSHAExtensions; /** * @brief Returns whether the CPU supports fused multiply-add. * - * @warning This method only checks CPU support and assumes OS support! - * * @note This method is only available on AMD64 and x86. * * @return Whether the CPU supports fused multiply-add */ + (bool)supportsFusedMultiplyAdd; @@ -362,152 +344,158 @@ * @return Whether the CPU supports F16C */ + (bool)supportsF16C; /** - * @brief Returns whether the CPU supports AVX-512 Foundation. + * @brief Returns whether the CPU and OS support AVX-512 Foundation. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Foundation + * @return Whether the CPU and OS support AVX-512 Foundation */ + (bool)supportsAVX512Foundation; /** - * @brief Returns whether the CPU supports AVX-512 Conflict Detection + * @brief Returns whether the CPU and OS support AVX-512 Conflict Detection * Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Conflict Detection Instructions + * @return Whether the CPU and OS support AVX-512 Conflict Detection + * Instructions */ + (bool)supportsAVX512ConflictDetectionInstructions; /** - * @brief Returns whether the CPU supports AVX-512 Exponential and Reciprocal - * Instructions. + * @brief Returns whether the CPU and OS support AVX-512 Exponential and + * Reciprocal Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Exponential and Reciprocal + * @return Whether the CPU and OS support AVX-512 Exponential and Reciprocal * Instructions */ + (bool)supportsAVX512ExponentialAndReciprocalInstructions; /** - * @brief Returns whether the CPU supports AVX-512 Prefetch Instructions. + * @brief Returns whether the CPU and OS support AVX-512 Prefetch Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Prefetch Instructions + * @return Whether the CPU and OS support AVX-512 Prefetch Instructions */ + (bool)supportsAVX512PrefetchInstructions; /** - * @brief Returns whether the CPU supports AVX-512 Vector Length Extensions. + * @brief Returns whether the CPU and OS support AVX-512 Vector Length + * Extensions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Vector Length Extensions + * @return Whether the CPU and OS support AVX-512 Vector Length Extensions */ + (bool)supportsAVX512VectorLengthExtensions; /** - * @brief Returns whether the CPU supports AVX-512 Doubleword and Quadword + * @brief Returns whether the CPU and OS support AVX-512 Doubleword and Quadword * Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Doubleword and Quadword Instructions + * @return Whether the CPU and OS support AVX-512 Doubleword and Quadword + * Instructions */ + (bool)supportsAVX512DoublewordAndQuadwordInstructions; /** - * @brief Returns whether the CPU supports AVX-512 Byte and Word Instructions. + * @brief Returns whether the CPU and OS support AVX-512 Byte and Word + * Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Byte and Word Instructions + * @return Whether the CPU and OS support AVX-512 Byte and Word Instructions */ + (bool)supportsAVX512ByteAndWordInstructions; /** - * @brief Returns whether the CPU supports AVX-512 Integer Fused Multiply Add. + * @brief Returns whether the CPU and OS support AVX-512 Integer Fused + * Multiply-Add. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Integer Fused Multiply Add + * @return Whether the CPU and OS support AVX-512 Integer Fused Multiply-Add */ + (bool)supportsAVX512IntegerFusedMultiplyAdd; /** - * @brief Returns whether the CPU supports AVX-512 Vector Byte Manipulation - * Instructions. + * @brief Returns whether the CPU and OS support AVX-512 Vector Byte + * Manipulation Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Vector Byte Manipulation + * @return Whether the CPU and OS support AVX-512 Vector Byte Manipulation * Instructions */ + (bool)supportsAVX512VectorByteManipulationInstructions; /** - * @brief Returns whether the CPU supports the AVX-512 Vector Population Count - * Instruction. + * @brief Returns whether the CPU and OS support the AVX-512 Vector Population + * Count Instruction. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 the Vector Population Count + * @return Whether the CPU and OS support AVX-512 the Vector Population Count * Instruction */ + (bool)supportsAVX512VectorPopulationCountInstruction; /** - * @brief Returns whether the CPU supports AVX-512 Vector Neural Network + * @brief Returns whether the CPU and OS support AVX-512 Vector Neural Network * Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Vector Neural Network Instructions + * @return Whether the CPU and OS support AVX-512 Vector Neural Network + * Instructions */ + (bool)supportsAVX512VectorNeuralNetworkInstructions; /** - * @brief Returns whether the CPU supports AVX-512 Vector Byte Manipulation - * Instructions 2. + * @brief Returns whether the CPU and OS support AVX-512 Vector Byte + * Manipulation Instructions 2. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Vector Byte Manipulation + * @return Whether the CPU and OS support AVX-512 Vector Byte Manipulation * Instructions 2 */ + (bool)supportsAVX512VectorByteManipulationInstructions2; /** - * @brief Returns whether the CPU supports AVX-512 Bit Algorithms. + * @brief Returns whether the CPU and OS support AVX-512 Bit Algorithms. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Bit Algorithms + * @return Whether the CPU and OS support AVX-512 Bit Algorithms */ + (bool)supportsAVX512BitAlgorithms; /** - * @brief Returns whether the CPU supports AVX-512 Float16 Instructions. + * @brief Returns whether the CPU and OS support AVX-512 Float16 Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 Float16 Instructions + * @return Whether the CPU and OS support AVX-512 Float16 Instructions */ + (bool)supportsAVX512Float16Instructions; /** - * @brief Returns whether the CPU supports AVX-512 BFloat16 Instructions. + * @brief Returns whether the CPU and OS support AVX-512 BFloat16 Instructions. * * @note This method is only available on AMD64 and x86. * - * @return Whether the CPU supports AVX-512 BFloat16 Instructions + * @return Whether the CPU and OS support AVX-512 BFloat16 Instructions */ + (bool)supportsAVX512BFloat16Instructions; #endif #if defined(OF_POWERPC) || defined(OF_POWERPC64) || defined(DOXYGEN) Index: src/OFSystemInfo.m ================================================================== --- src/OFSystemInfo.m +++ src/OFSystemInfo.m @@ -316,10 +316,28 @@ memset(®s, 0, sizeof(regs)); # endif return regs; } + +static OF_INLINE struct X86Regs +x86XCR(uint32_t ecx) +{ + struct X86Regs regs = { 0 }; + + if (!(x86CPUID(1, 0).ecx & (1u << 27))) + return regs; + + __asm__ ( + "xgetbv" + : "=a" (regs.eax), + "=d" (regs.edx) + : "c" (ecx) + ); + + return regs; +} #endif @implementation OFSystemInfo + (void)initialize { @@ -757,46 +775,54 @@ x86CPUID(0x80000001, 0).edx & (1u << 30)); } + (bool)supportsSSE { - return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).edx & (1u << 25)); + return ((x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).edx & (1u << 25)) && + x86XCR(0).eax & (1u << 1)); } + (bool)supportsSSE2 { - return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).edx & (1u << 26)); + return ((x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).edx & (1u << 26)) && + x86XCR(0).eax & (1u << 1)); } + (bool)supportsSSE3 { - return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 0)); + return ((x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 0)) && + x86XCR(0).eax & (1u << 1)); } + (bool)supportsSSSE3 { - return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 9)); + return ((x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 9)) && + x86XCR(0).eax & (1u << 1)); } + (bool)supportsSSE41 { - return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 19)); + return ((x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 19)) && + x86XCR(0).eax & (1u << 1)); } + (bool)supportsSSE42 { - return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 20)); + return ((x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 20)) && + x86XCR(0).eax & (1u << 1)); } + (bool)supportsAVX { - return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 28)); + return ((x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 28)) && + x86XCR(0).eax & (1u << 2)); } + (bool)supportsAVX2 { - return (x86CPUID(0, 0).eax >= 7 && (x86CPUID(7, 0).ebx & (1u << 5))); + return ((x86CPUID(0, 0).eax >= 7 && (x86CPUID(7, 0).ebx & (1u << 5))) && + x86XCR(0).eax & (1u << 2)); } + (bool)supportsAESNI { return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 25)); @@ -817,81 +843,96 @@ return (x86CPUID(0, 0).eax >= 1 && x86CPUID(1, 0).ecx & (1u << 29)); } + (bool)supportsAVX512Foundation { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 16)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 16)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512ConflictDetectionInstructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 28)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 28)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512ExponentialAndReciprocalInstructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 27)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 27)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512PrefetchInstructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 26)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 26)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512VectorLengthExtensions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 31)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 31)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512DoublewordAndQuadwordInstructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 17)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 17)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512ByteAndWordInstructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 30)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 30)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512IntegerFusedMultiplyAdd { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 21)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ebx & (1u << 21)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512VectorByteManipulationInstructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 1)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 1)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512VectorPopulationCountInstruction { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 14)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 14)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512VectorNeuralNetworkInstructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 11)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 11)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512VectorByteManipulationInstructions2 { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 6)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 6)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512BitAlgorithms { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 12)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).ecx & (1u << 12)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512Float16Instructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).edx & (1u << 23)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 0).edx & (1u << 23)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } + (bool)supportsAVX512BFloat16Instructions { - return (x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 1).eax & (1u << 5)); + return ((x86CPUID(0, 0).eax >= 7 && x86CPUID(7, 1).eax & (1u << 5)) && + x86XCR(0).eax & ((1u << 5) | (1u << 6) | (1u << 7))); } #endif #if defined(OF_POWERPC) || defined(OF_POWERPC64) + (bool)supportsAltiVec