aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Steed <george.steed@arm.com>2024-03-15 17:43:09 +0000
committerFrank Barchard <fbarchard@chromium.org>2024-04-19 06:37:04 +0000
commitba0bba5b2b7e38c9365a5d152b4efa0458863213 (patch)
tree5dee00e410b4b884a70fa4ee207528c2db6249b9
parent4838e7a194592c026e18cbe9f83a08e07a0ce95b (diff)
downloadlibyuv-ba0bba5b2b7e38c9365a5d152b4efa0458863213.tar.gz
[AArch64] Use getauxval(AT_HWCAP{,2}) for feature detection
This has the advantage of also working under emulation where faking /proc/cpuinfo is not supported. For the Chromium sandbox, getauxval is supported since API version 18. The minimum supported API version at time of writing is 21 so we should be able to use getauxval unconditionally. On the off-chance the call fails it will return 0 and we will correctly fall-back to using only Neon. Change-Id: Ibbaa9caec1915ac0725c42d6cd2abc7ce19786c7 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5453620 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
-rw-r--r--include/libyuv/cpu_id.h2
-rw-r--r--source/cpu_id.cc69
-rw-r--r--unit_test/cpu_test.cc27
-rw-r--r--unit_test/testdata/cortex_a510.txt8
-rw-r--r--unit_test/testdata/cortex_a715.txt8
-rw-r--r--unit_test/testdata/cortex_x3.txt8
-rw-r--r--unit_test/testdata/juno.txt15
7 files changed, 47 insertions, 90 deletions
diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h
index 14d26e47..b4dc1c50 100644
--- a/include/libyuv/cpu_id.h
+++ b/include/libyuv/cpu_id.h
@@ -88,7 +88,7 @@ static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
LIBYUV_API
-int AArch64CpuCaps(const char* cpuinfo_name);
+int AArch64CpuCaps(unsigned long hwcap, unsigned long hwcap2);
LIBYUV_API
int MipsCpuCaps(const char* cpuinfo_name);
LIBYUV_API
diff --git a/source/cpu_id.cc b/source/cpu_id.cc
index 221187dc..9d12ab4a 100644
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -22,6 +22,7 @@
// For ArmCpuCaps() but unittested on all platforms
#include <stdio.h> // For fopen()
#include <string.h>
+#include <sys/auxv.h> // For getauxval()
#ifdef __cplusplus
namespace libyuv {
@@ -162,35 +163,31 @@ LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
return features;
}
+// Define hwcap values ourselves: building with an old auxv header where these
+// hwcap values are not defined should not prevent features from being enabled.
+#define YUV_AARCH64_HWCAP_ASIMDDP (1 << 20)
+#define YUV_AARCH64_HWCAP_SVE (1 << 22)
+#define YUV_AARCH64_HWCAP2_SVE2 (1 << 1)
+#define YUV_AARCH64_HWCAP2_I8MM (1 << 13)
+
// For AArch64, but public to allow testing on any CPU.
-LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(const char* cpuinfo_name) {
- char cpuinfo_line[512];
- FILE* f = fopen(cpuinfo_name, "re");
- if (!f) {
- // Assume Neon if /proc/cpuinfo is unavailable.
- // This will occur for Chrome sandbox for Pepper or Render process.
- return kCpuHasNEON;
- }
- memset(cpuinfo_line, 0, sizeof(cpuinfo_line));
- // Neon is mandatory on AArch64.
+LIBYUV_API SAFEBUFFERS int AArch64CpuCaps(unsigned long hwcap,
+ unsigned long hwcap2) {
+ // Neon is mandatory on AArch64, so enable regardless of hwcaps.
int features = kCpuHasNEON;
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f)) {
- if (memcmp(cpuinfo_line, "Features", 8) == 0) {
- if (cpuinfo_search(cpuinfo_line, " asimddp", 8)) {
- features |= kCpuHasNeonDotProd;
- }
- if (cpuinfo_search(cpuinfo_line, " i8mm", 5)) {
- features |= kCpuHasNeonI8MM;
- }
- if (cpuinfo_search(cpuinfo_line, " sve", 4)) {
- features |= kCpuHasSVE;
- }
- if (cpuinfo_search(cpuinfo_line, " sve2", 5)) {
- features |= kCpuHasSVE2;
- }
- }
+
+ if (hwcap & YUV_AARCH64_HWCAP_ASIMDDP) {
+ features |= kCpuHasNeonDotProd;
+ }
+ if (hwcap2 & YUV_AARCH64_HWCAP2_I8MM) {
+ features |= kCpuHasNeonI8MM;
+ }
+ if (hwcap & YUV_AARCH64_HWCAP_SVE) {
+ features |= kCpuHasSVE;
+ }
+ if (hwcap2 & YUV_AARCH64_HWCAP2_SVE2) {
+ features |= kCpuHasSVE2;
}
- fclose(f);
return features;
}
@@ -368,18 +365,18 @@ static SAFEBUFFERS int GetCpuFlags(void) {
cpu_info |= kCpuHasLOONGARCH;
#endif
#if defined(__arm__) || defined(__aarch64__)
-// gcc -mfpu=neon defines __ARM_NEON__
-// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
-// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
-#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
- cpu_info = kCpuHasNEON;
-// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
-// flag in it.
-// So for aarch64, neon enabling is hard coded here.
-#endif
#if defined(__aarch64__)
- cpu_info = AArch64CpuCaps("/proc/cpuinfo");
+ // getauxval is supported since Android SDK version 18, minimum at time of
+ // writing is 21, so should be safe to always use this. If getauxval is
+ // somehow disabled then getauxval returns 0, which will leave Neon enabled
+ // since Neon is mandatory on AArch64.
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ unsigned long hwcap2 = getauxval(AT_HWCAP2);
+ cpu_info = AArch64CpuCaps(hwcap, hwcap2);
#else
+ // gcc -mfpu=neon defines __ARM_NEON__
+ // __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
+ // For Linux, /proc/cpuinfo can be tested but without that assume Neon.
// Linux arm parse text file for neon detect.
cpu_info = ArmCpuCaps("/proc/cpuinfo");
#endif
diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc
index b620e806..309732d1 100644
--- a/unit_test/cpu_test.cc
+++ b/unit_test/cpu_test.cc
@@ -274,21 +274,20 @@ TEST_F(LibYUVBaseTest, TestLinuxArm) {
}
TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
- if (FileExists("../../unit_test/testdata/juno.txt")) {
- printf("Note: testing to load \"../../unit_test/testdata/juno.txt\"\n");
+ // Values taken from a Cortex-A57 machine, only Neon available.
+ EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U));
- EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps("../../unit_test/testdata/juno.txt"));
- int v9_expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM |
- kCpuHasSVE | kCpuHasSVE2;
- EXPECT_EQ(v9_expected,
- AArch64CpuCaps("../../unit_test/testdata/cortex_a510.txt"));
- EXPECT_EQ(v9_expected,
- AArch64CpuCaps("../../unit_test/testdata/cortex_a715.txt"));
- EXPECT_EQ(v9_expected,
- AArch64CpuCaps("../../unit_test/testdata/cortex_x3.txt"));
- } else {
- printf("WARNING: unable to load \"../../unit_test/testdata/juno.txt\"\n");
- }
+ // Values taken from a Google Pixel 7.
+ int expected = kCpuHasNEON | kCpuHasNeonDotProd;
+ EXPECT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U));
+
+ // Values taken from a Google Pixel 8.
+ expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | kCpuHasSVE |
+ kCpuHasSVE2;
+ EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU));
+
+ // Values taken from a Neoverse N2 machine.
+ EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU));
}
TEST_F(LibYUVBaseTest, TestLinuxMipsMsa) {
diff --git a/unit_test/testdata/cortex_a510.txt b/unit_test/testdata/cortex_a510.txt
deleted file mode 100644
index 8c2aeb4c..00000000
--- a/unit_test/testdata/cortex_a510.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-processor : 0
-BogoMIPS : 49.15
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
-CPU implementer : 0x41
-CPU architecture: 8
-CPU variant : 0x1
-CPU part : 0xd46
-CPU revision : 1
diff --git a/unit_test/testdata/cortex_a715.txt b/unit_test/testdata/cortex_a715.txt
deleted file mode 100644
index 6d1f33af..00000000
--- a/unit_test/testdata/cortex_a715.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-processor : 4
-BogoMIPS : 49.15
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
-CPU implementer : 0x41
-CPU architecture: 8
-CPU variant : 0x1
-CPU part : 0xd4d
-CPU revision : 0
diff --git a/unit_test/testdata/cortex_x3.txt b/unit_test/testdata/cortex_x3.txt
deleted file mode 100644
index a63db856..00000000
--- a/unit_test/testdata/cortex_x3.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-processor : 8
-BogoMIPS : 49.15
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bti
-CPU implementer : 0x41
-CPU architecture: 8
-CPU variant : 0x1
-CPU part : 0xd4e
-CPU revision : 0
diff --git a/unit_test/testdata/juno.txt b/unit_test/testdata/juno.txt
deleted file mode 100644
index dd465272..00000000
--- a/unit_test/testdata/juno.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-Processor : AArch64 Processor rev 0 (aarch64)
-processor : 0
-processor : 1
-processor : 2
-processor : 3
-processor : 4
-processor : 5
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
-CPU implementer : 0x41
-CPU architecture: AArch64
-CPU variant : 0x0
-CPU part : 0xd07
-CPU revision : 0
-
-Hardware : Juno