diff options
author | Frank Barchard <fbarchard@google.com> | 2023-02-27 01:23:59 -0800 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-03-03 17:46:23 +0000 |
commit | f9b23b9cc0ca3bd27b9acc07ea0450cd5097175d (patch) | |
tree | a671b95e4d159f91e85d9e4053a0af6db8c1d7eb /unit_test | |
parent | e66f436560fa8a4773fbd079837bc602cf97e35a (diff) | |
download | libyuv-f9b23b9cc0ca3bd27b9acc07ea0450cd5097175d.tar.gz |
Transpose 4x4 for SSE2 and AVX2
Skylake Xeon
AVX2 Transpose4x4_Opt (290 ms)
SSE2 Transpose4x4_Opt (302 ms)
C Transpose4x4_Opt (522 ms)
AMD Zen2
AVX2 Transpose4x4_Opt (136 ms)
SSE2 Transpose4x4_Opt (137 ms)
C Transpose4x4_Opt (431 ms)
Bug: None
Change-Id: I4997dbd5c5387c22bfd6c5960b421504e4bc8a2a
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4292946
Reviewed-by: Justin Green <greenjustin@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'unit_test')
-rw-r--r-- | unit_test/rotate_test.cc | 76 |
1 files changed, 65 insertions, 11 deletions
diff --git a/unit_test/rotate_test.cc b/unit_test/rotate_test.cc index e8d2ca16..abc08efa 100644 --- a/unit_test/rotate_test.cc +++ b/unit_test/rotate_test.cc @@ -864,7 +864,55 @@ TEST_F(LibYUVRotateTest, I410Rotate270_Opt) { #if defined(ENABLE_ROW_TESTS) -TEST_F(LibYUVRotateTest, Transpose4x4) { +TEST_F(LibYUVRotateTest, Transpose4x4_Test) { + // dst width and height + const int width = 4; + const int height = 4; + int src_pixels[4][4]; + int dst_pixels_c[4][4]; + int dst_pixels_opt[4][4]; + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + src_pixels[i][j] = i * 10 + j; + } + } + memset(dst_pixels_c, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); + + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_c, width * 4, width); + + const int benchmark_iterations = + (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) / + (4 * 4); + for (int i = 0; i < benchmark_iterations; ++i) { +#if defined(HAS_TRANSPOSE4X4_32_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#elif defined(HAS_TRANSPOSE4X4_32_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { + Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } + } + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]); + EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]); + } + } +} + +TEST_F(LibYUVRotateTest, Transpose4x4_Opt) { // dst width and height const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3; const int height = 4; @@ -874,29 +922,35 @@ TEST_F(LibYUVRotateTest, Transpose4x4) { MemRandomize(src_pixels, height * width * 4); memset(dst_pixels_c, 1, width * height * 4); - memset(dst_pixels_opt, 1, width * height * 4); + memset(dst_pixels_opt, 2, width * height * 4); Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, (uint8_t*)dst_pixels_c, width * 4, width); for (int i = 0; i < benchmark_iterations_; ++i) { -#if defined(__aarch64__) +#if defined(HAS_TRANSPOSE4X4_32_NEON) if (TestCpuFlag(kCpuHasNEON)) { Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4, (uint8_t*)dst_pixels_opt, width * 4, width); - } else { + } else +#elif defined(HAS_TRANSPOSE4X4_32_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else if (TestCpuFlag(kCpuHasSSE2)) { + Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4, + (uint8_t*)dst_pixels_opt, width * 4, width); + } else +#endif + { Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, (uint8_t*)dst_pixels_opt, width * 4, width); } -#else - Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4, - (uint8_t*)dst_pixels_opt, width * 4, width); -#endif } - // for (int i = 0; i < width * height; ++i) { - // EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); - // } + for (int i = 0; i < width * height; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } free_aligned_buffer_page_end(src_pixels); free_aligned_buffer_page_end(dst_pixels_c); |