diff options
author | George Steed <george.steed@arm.com> | 2024-03-15 17:43:09 +0000 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2024-04-19 06:37:04 +0000 |
commit | ba0bba5b2b7e38c9365a5d152b4efa0458863213 (patch) | |
tree | 5dee00e410b4b884a70fa4ee207528c2db6249b9 | |
parent | 4838e7a194592c026e18cbe9f83a08e07a0ce95b (diff) | |
download | libyuv-ba0bba5b2b7e38c9365a5d152b4efa0458863213.tar.gz |
[AArch64] Use getauxval(AT_HWCAP{,2}) for feature detection
This has the advantage of also working under emulation where
faking /proc/cpuinfo is not supported.
For the Chromium sandbox, getauxval is supported since API version 18.
The minimum supported API version at time of writing is 21 so we should
be able to use getauxval unconditionally. On the off-chance the call
fails it will return 0 and we will correctly fall-back to using only
Neon.
Change-Id: Ibbaa9caec1915ac0725c42d6cd2abc7ce19786c7
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5453620
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
-rw-r--r-- | include/libyuv/cpu_id.h | 2 | ||||
-rw-r--r-- | source/cpu_id.cc | 69 | ||||
-rw-r--r-- | unit_test/cpu_test.cc | 27 | ||||
-rw-r--r-- | unit_test/testdata/cortex_a510.txt | 8 | ||||
-rw-r--r-- | unit_test/testdata/cortex_a715.txt | 8 | ||||
-rw-r--r-- | unit_test/testdata/cortex_x3.txt | 8 | ||||
-rw-r--r-- | unit_test/testdata/juno.txt | 15 |
7 files changed, 47 insertions, 90 deletions
diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 14d26e47..b4dc1c50 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -88,7 +88,7 @@ static __inline int TestCpuFlag(int test_flag) { LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); LIBYUV_API -int AArch64CpuCaps(const char* cpuinfo_name); +int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2); LIBYUV_API int MipsCpuCaps(const char* cpuinfo_name); LIBYUV_API diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 221187dc..9d12ab4a 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -22,6 +22,7 @@ // For ArmCpuCaps() but unittested on all platforms #include <stdio.h> // For fopen() #include <string.h> +#include <sys/auxv.h> // For getauxval() #ifdef __cplusplus namespace libyuv { @@ -162,35 +163,31 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { return features; } +// Define hwcap values ourselves: building with an old auxv header where these +// hwcap values are not defined should not prevent features from being enabled. +#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20) +#define YUV_AARCH64_HWCAP_SVE (1 << 22) +#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1) +#define YUV_AARCH64_HWCAP2_I8MM (1 << 13) + // For AArch64, but public to allow testing on any CPU. -LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(const char* cpuinfo_name) { - char cpuinfo_line[512]; - FILE* f = fopen(cpuinfo_name, "re"); - if (!f) { - // Assume Neon if /proc/cpuinfo is unavailable. - // This will occur for Chrome sandbox for Pepper or Render process. - return kCpuHasNEON; - } - memset(cpuinfo_line, 0, sizeof(cpuinfo_line)); - // Neon is mandatory on AArch64. +LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap, + unsigned long hwcap2) { + // Neon is mandatory on AArch64, so enable regardless of hwcaps. int features = kCpuHasNEON; - while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) { - if (memcmp(cpuinfo_line, "Features", 8) == 0) { - if (cpuinfo_search(cpuinfo_line, " asimddp", 8)) { - features |= kCpuHasNeonDotProd; - } - if (cpuinfo_search(cpuinfo_line, " i8mm", 5)) { - features |= kCpuHasNeonI8MM; - } - if (cpuinfo_search(cpuinfo_line, " sve", 4)) { - features |= kCpuHasSVE; - } - if (cpuinfo_search(cpuinfo_line, " sve2", 5)) { - features |= kCpuHasSVE2; - } - } + + if (hwcap & YUV_AARCH64_HWCAP_ASIMDDP) { + features |= kCpuHasNeonDotProd; + } + if (hwcap2 & YUV_AARCH64_HWCAP2_I8MM) { + features |= kCpuHasNeonI8MM; + } + if (hwcap & YUV_AARCH64_HWCAP_SVE) { + features |= kCpuHasSVE; + } + if (hwcap2 & YUV_AARCH64_HWCAP2_SVE2) { + features |= kCpuHasSVE2; } - fclose(f); return features; } @@ -368,18 +365,18 @@ static SAFEBUFFERS int GetCpuFlags(void) { cpu_info |= kCpuHasLOONGARCH; #endif #if defined(__arm__) || defined(__aarch64__) -// gcc -mfpu=neon defines __ARM_NEON__ -// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. -// For Linux, /proc/cpuinfo can be tested but without that assume Neon. -#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__) - cpu_info = kCpuHasNEON; -// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon -// flag in it. -// So for aarch64, neon enabling is hard coded here. -#endif #if defined(__aarch64__) - cpu_info = AArch64CpuCaps("/proc/cpuinfo"); + // getauxval is supported since Android SDK version 18, minimum at time of + // writing is 21, so should be safe to always use this. If getauxval is + // somehow disabled then getauxval returns 0, which will leave Neon enabled + // since Neon is mandatory on AArch64. + unsigned long hwcap = getauxval(AT_HWCAP); + unsigned long hwcap2 = getauxval(AT_HWCAP2); + cpu_info = AArch64CpuCaps(hwcap, hwcap2); #else + // gcc -mfpu=neon defines __ARM_NEON__ + // __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. + // For Linux, /proc/cpuinfo can be tested but without that assume Neon. // Linux arm parse text file for neon detect. cpu_info = ArmCpuCaps("/proc/cpuinfo"); #endif diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index b620e806..309732d1 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -274,21 +274,20 @@ TEST_F(LibYUVBaseTest, TestLinuxArm) { } TEST_F(LibYUVBaseTest, TestLinuxAArch64) { - if (FileExists("../../unit_test/testdata/juno.txt")) { - printf("Note: testing to load \"../../unit_test/testdata/juno.txt\"\n"); + // Values taken from a Cortex-A57 machine, only Neon available. + EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U)); - EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps("../../unit_test/testdata/juno.txt")); - int v9_expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | - kCpuHasSVE | kCpuHasSVE2; - EXPECT_EQ(v9_expected, - AArch64CpuCaps("../../unit_test/testdata/cortex_a510.txt")); - EXPECT_EQ(v9_expected, - AArch64CpuCaps("../../unit_test/testdata/cortex_a715.txt")); - EXPECT_EQ(v9_expected, - AArch64CpuCaps("../../unit_test/testdata/cortex_x3.txt")); - } else { - printf("WARNING: unable to load \"../../unit_test/testdata/juno.txt\"\n"); - } + // Values taken from a Google Pixel 7. + int expected = kCpuHasNEON | kCpuHasNeonDotProd; + EXPECT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U)); + + // Values taken from a Google Pixel 8. + expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | kCpuHasSVE | + kCpuHasSVE2; + EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU)); + + // Values taken from a Neoverse N2 machine. + EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU)); } TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) { diff --git a/unit_test/testdata/cortex_a510.txt b/unit_test/testdata/cortex_a510.txt deleted file mode 100644 index 8c2aeb4c..00000000 --- a/unit_test/testdata/cortex_a510.txt +++ /dev/null @@ -1,8 +0,0 @@ -processor : 0 -BogoMIPS : 49.15 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd46 -CPU revision : 1 diff --git a/unit_test/testdata/cortex_a715.txt b/unit_test/testdata/cortex_a715.txt deleted file mode 100644 index 6d1f33af..00000000 --- a/unit_test/testdata/cortex_a715.txt +++ /dev/null @@ -1,8 +0,0 @@ -processor : 4 -BogoMIPS : 49.15 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd4d -CPU revision : 0 diff --git a/unit_test/testdata/cortex_x3.txt b/unit_test/testdata/cortex_x3.txt deleted file mode 100644 index a63db856..00000000 --- a/unit_test/testdata/cortex_x3.txt +++ /dev/null @@ -1,8 +0,0 @@ -processor : 8 -BogoMIPS : 49.15 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd4e -CPU revision : 0 diff --git a/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt deleted file mode 100644 index dd465272..00000000 --- a/unit_test/testdata/juno.txt +++ /dev/null @@ -1,15 +0,0 @@ -Processor : AArch64 Processor rev 0 (aarch64) -processor : 0 -processor : 1 -processor : 2 -processor : 3 -processor : 4 -processor : 5 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 -CPU implementer : 0x41 -CPU architecture: AArch64 -CPU variant : 0x0 -CPU part : 0xd07 -CPU revision : 0 - -Hardware : Juno |