From af6ac8265bbd07bcf977526458b60305c4304288 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Mon, 8 Jan 2024 23:38:36 -0800 Subject: AVX10 cpuid detect added Replace unused popcount feature bit Bug: libyuv:911 Change-Id: Icd88fcc732751d39b0950d5f09a58bc9ac2c4e30 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5179911 Reviewed-by: richard winterton Commit-Queue: Frank Barchard --- README.chromium | 2 +- include/libyuv/cpu_id.h | 4 ++-- include/libyuv/version.h | 2 +- source/cpu_id.cc | 2 +- source/row_lasx.cc | 19 ++++++++++--------- unit_test/convert_argb_test.cc | 1 - unit_test/convert_test.cc | 10 +++++----- unit_test/cpu_test.cc | 4 ++-- unit_test/unit_test.cc | 4 ++-- util/cpuid.c | 4 ++-- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/README.chromium b/README.chromium index 069dfc2c..1389f285 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv/ -Version: 1882 +Version: 1883 License: BSD License File: LICENSE Shipped: yes diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 434771bc..5a81e7c9 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -31,7 +31,7 @@ static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; // unused at this time. +static const int kCpuHasSSE42 = 0x100; static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; @@ -43,7 +43,7 @@ static const int kCpuHasAVX512VNNI = 0x10000; static const int kCpuHasAVX512VBMI = 0x20000; static const int kCpuHasAVX512VBMI2 = 0x40000; static const int kCpuHasAVX512VBITALG = 0x80000; -static const int kCpuHasAVX512VPOPCNTDQ = 0x100000; +static const int kCpuHasAVX10 = 0x100000; static const int kCpuHasAVXVNNI = 0x200000; static const int kCpuHasAVXVNNIINT8 = 0x400000; diff --git a/include/libyuv/version.h b/include/libyuv/version.h index b477cb75..a9c54400 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1882 +#define LIBYUV_VERSION 1883 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 5dfe3b3a..eedce16b 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -322,7 +322,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0; cpu_info |= (cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0; cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0; - cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0; + cpu_info |= (cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0; } } #endif diff --git a/source/row_lasx.cc b/source/row_lasx.cc index 0814ef1c..be85022e 100644 --- a/source/row_lasx.cc +++ b/source/row_lasx.cc @@ -543,7 +543,8 @@ void I422ToARGB4444Row_LASX(const uint8_t* src_y, __m256i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg; __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); - __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000, 0xF000F000F000F000}; + __m256i alpha = (__m256i)v4u64{0xF000F000F000F000, 0xF000F000F000F000, + 0xF000F000F000F000, 0xF000F000F000F000}; __m256i mask = {0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0, 0x00F000F000F000F0}; @@ -595,7 +596,7 @@ void I422ToARGB1555Row_LASX(const uint8_t* src_y, __m256i vec_ubvr, vec_ugvg; __m256i const_0x80 = __lasx_xvldi(0x80); __m256i alpha = (__m256i)v4u64{0x8000800080008000, 0x8000800080008000, - 0x8000800080008000, 0x8000800080008000}; + 0x8000800080008000, 0x8000800080008000}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_yg, vec_yb); vec_ubvr = __lasx_xvilvl_h(vec_ub, vec_vr); @@ -799,7 +800,7 @@ void ARGBToUVRow_LASX(const uint8_t* src_argb0, __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb0, 0, src_argb0, 32, src_argb0, 64, @@ -1037,7 +1038,7 @@ void ARGBToUV444Row_LASX(const uint8_t* src_argb, __m256i const_94 = __lasx_xvldi(94); __m256i const_18 = __lasx_xvldi(18); __m256i const_0x8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + 0x8080808080808080, 0x8080808080808080}; __m256i control = {0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}; for (x = 0; x < len; x++) { @@ -1609,7 +1610,7 @@ void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_argb1555, 0, src_argb1555, 32, next_argb1555, 0, @@ -1726,7 +1727,7 @@ void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + 0x8080808080808080, 0x8080808080808080}; for (x = 0; x < len; x++) { DUP4_ARG2(__lasx_xvld, src_rgb565, 0, src_rgb565, 32, next_rgb565, 0, @@ -1793,7 +1794,7 @@ void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_b = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_b = {0x0706050403020100, 0x1D1A1714110A0908, @@ -1856,7 +1857,7 @@ void RAWToUVRow_LASX(const uint8_t* src_raw, __m256i const_94 = __lasx_xvldi(0x42F); __m256i const_18 = __lasx_xvldi(0x409); __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + 0x8080808080808080, 0x8080808080808080}; __m256i shuff0_r = {0x15120F0C09060300, 0x00000000001E1B18, 0x15120F0C09060300, 0x00000000001E1B18}; __m256i shuff1_r = {0x0706050403020100, 0x1D1A1714110A0908, @@ -2242,7 +2243,7 @@ void ARGBToUVJRow_LASX(const uint8_t* src_argb, __m256i const_53 = __lasx_xvldi(0x435); __m256i const_10 = __lasx_xvldi(0x40A); __m256i const_8080 = (__m256i)v4u64{0x8080808080808080, 0x8080808080808080, - 0x8080808080808080, 0x8080808080808080}; + 0x8080808080808080, 0x8080808080808080}; __m256i shuff = {0x1614060412100200, 0x1E1C0E0C1A180A08, 0x1715070513110301, 0x1F1D0F0D1B190B09}; diff --git a/unit_test/convert_argb_test.cc b/unit_test/convert_argb_test.cc index 90451db7..aeee8a7f 100644 --- a/unit_test/convert_argb_test.cc +++ b/unit_test/convert_argb_test.cc @@ -783,7 +783,6 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1) TESTATOA(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1) TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) - #define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##Dither##N) { \ diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index dd20c747..f55bace3 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -438,11 +438,11 @@ int I400ToNV21(const uint8_t* src_y, SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) #else -#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ - DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ - SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) #endif diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index ab01a4e8..437b6632 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -53,7 +53,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + int has_avx10 = TestCpuFlag(kCpuHasAVX10); int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); printf("Has X86 0x%x\n", has_x86); @@ -72,7 +72,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); + printf("Has AVX10 0x%x\n", has_avx10); printf("HAS AVXVNNI 0x%x\n", has_avxvnni); printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); #endif diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc index 3192760c..239d5b92 100644 --- a/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -144,8 +144,8 @@ int TestCpuEnv(int cpu_info) { if (TestEnv("LIBYUV_DISABLE_AVX512VBITALG")) { cpu_info &= ~libyuv::kCpuHasAVX512VBITALG; } - if (TestEnv("LIBYUV_DISABLE_AVX512VPOPCNTDQ")) { - cpu_info &= ~libyuv::kCpuHasAVX512VPOPCNTDQ; + if (TestEnv("LIBYUV_DISABLE_AVX10")) { + cpu_info &= ~libyuv::kCpuHasAVX10; } if (TestEnv("LIBYUV_DISABLE_AVXVNNI")) { cpu_info &= ~libyuv::kCpuHasAVXVNNI; diff --git a/util/cpuid.c b/util/cpuid.c index 61399a77..c07e6e95 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -102,7 +102,7 @@ int main(int argc, const char* argv[]) { int has_avx512vbmi = TestCpuFlag(kCpuHasAVX512VBMI); int has_avx512vbmi2 = TestCpuFlag(kCpuHasAVX512VBMI2); int has_avx512vbitalg = TestCpuFlag(kCpuHasAVX512VBITALG); - int has_avx512vpopcntdq = TestCpuFlag(kCpuHasAVX512VPOPCNTDQ); + int has_avx10 = TestCpuFlag(kCpuHasAVX10); int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); printf("Has X86 0x%x\n", has_x86); @@ -121,7 +121,7 @@ int main(int argc, const char* argv[]) { printf("Has AVX512VBMI 0x%x\n", has_avx512vbmi); printf("Has AVX512VBMI2 0x%x\n", has_avx512vbmi2); printf("Has AVX512VBITALG 0x%x\n", has_avx512vbitalg); - printf("Has AVX512VPOPCNTDQ 0x%x\n", has_avx512vpopcntdq); + printf("Has AVX10 0x%x\n", has_avx10); printf("HAS AVXVNNI 0x%x\n", has_avxvnni); printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); } -- cgit v1.2.3