diff options
author | George Steed <george.steed@arm.com> | 2024-04-19 11:10:24 +0100 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2024-05-03 18:42:51 +0000 |
commit | ee830a5f7707570537b2c1701fa88aa64cf1bb6f (patch) | |
tree | 9b50538e52404e3032c13a7b77ead92fc52d266e | |
parent | a114f85e50c31623d7cb4bae39b20dbd919cfc2a (diff) | |
download | libyuv-ee830a5f7707570537b2c1701fa88aa64cf1bb6f.tar.gz |
[AArch64] Enable feature detection on Windows and Apple Silicon
Using the platform-specific functions IsProcessorFeaturePresent and
sysctlbyname to check individual features.
Bug: libyuv:980
Change-Id: I7971238ca72e5df862c30c2e65331c46dc634074
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5465591
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
-rw-r--r-- | include/libyuv/cpu_id.h | 2 | ||||
-rw-r--r-- | source/cpu_id.cc | 65 | ||||
-rw-r--r-- | unit_test/cpu_test.cc | 2 |
3 files changed, 65 insertions, 4 deletions
diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index af8e0f5e..a352f22f 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -92,6 +92,7 @@ int MipsCpuCaps(const char* cpuinfo_name); LIBYUV_API int RiscvCpuCaps(const char* cpuinfo_name); +#ifdef __aarch64__ #if __linux__ // On Linux, parse AArch64 features from getauxval(AT_HWCAP{,2}). LIBYUV_API @@ -100,6 +101,7 @@ int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2); LIBYUV_API int AArch64CpuCaps(); #endif +#endif // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 5d192fbe..6b6e8745 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -23,10 +23,22 @@ #include <stdio.h> // For fopen() #include <string.h> -#ifdef __linux__ +#if defined(__linux__) && defined(__aarch64__) #include <sys/auxv.h> // For getauxval() #endif +#if defined(_WIN32) && defined(__aarch64__) +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#undef WIN32_EXTRA_LEAN +#define WIN32_EXTRA_LEAN +#include <windows.h> // For IsProcessorFeaturePresent() +#endif + +#if defined(__APPLE__) && defined(__aarch64__) +#include <sys/sysctl.h> // For sysctlbyname() +#endif + #ifdef __cplusplus namespace libyuv { extern "C" { @@ -166,6 +178,7 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { return features; } +#ifdef __aarch64__ #ifdef __linux__ // Define hwcap values ourselves: building with an old auxv header where these // hwcap values are not defined should not prevent features from being enabled. @@ -194,17 +207,63 @@ LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, } return features; } -#else // !defined(__linux__) + +#elif defined(_WIN32) +// For AArch64, but public to allow testing on any CPU. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { + // Neon is mandatory on AArch64, so enable unconditionally. + int features = kCpuHasNEON; + + // For more information on IsProcessorFeaturePresent(), see: + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent#parameters +#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + features |= kCpuHasNeonDotProd; + } +#endif + // No Neon I8MM or SVE feature detection available here at time of writing. + return features; +} + +#elif defined(__APPLE__) +static bool have_feature(const char* feature) { + // For more information on sysctlbyname(), see: + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + int64_t feature_present = 0; + size_t size = sizeof(feature_present); + if (sysctlbyname(feature, &feature_present, &size, NULL, 0) != 0) { + return false; + } + return feature_present; +} + +// For AArch64, but public to allow testing on any CPU. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { + // Neon is mandatory on AArch64, so enable unconditionally. + int features = kCpuHasNEON; + + if (have_feature("hw.optional.arm.FEAT_DotProd")) { + features |= kCpuHasNeonDotProd; + } + if (have_feature("hw.optional.arm.FEAT_I8MM")) { + features |= kCpuHasNeonI8MM; + } + // No SVE feature detection available here at time of writing. + return features; +} + +#else // !defined(__linux__) && !defined(_WIN32) && !defined(__APPLE__) // For AArch64, but public to allow testing on any CPU. LIBYUV_API SAFEBUFFERS int AArch64CpuCaps() { // Neon is mandatory on AArch64, so enable unconditionally. int features = kCpuHasNEON; - // TODO(libyuv:980) support feature detection on non-Linux platforms. + // TODO(libyuv:980) support feature detection on other platforms. return features; } #endif +#endif // defined(__aarch64__) LIBYUV_API SAFEBUFFERS int RiscvCpuCaps(const char* cpuinfo_name) { char cpuinfo_line[512]; diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index 38406c9a..928ef5fb 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -273,7 +273,7 @@ TEST_F(LibYUVBaseTest, TestLinuxArm) { #endif } -#ifdef __linux__ +#if defined(__linux__) && defined(__aarch64__) TEST_F(LibYUVBaseTest, TestLinuxAArch64) { // Values taken from a Cortex-A57 machine, only Neon available. EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U)); |