aboutsummaryrefslogtreecommitdiff
path: root/unit_test
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2023-02-27 01:23:59 -0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-03-03 17:46:23 +0000
commitf9b23b9cc0ca3bd27b9acc07ea0450cd5097175d (patch)
treea671b95e4d159f91e85d9e4053a0af6db8c1d7eb /unit_test
parente66f436560fa8a4773fbd079837bc602cf97e35a (diff)
downloadlibyuv-f9b23b9cc0ca3bd27b9acc07ea0450cd5097175d.tar.gz
Transpose 4x4 for SSE2 and AVX2
Skylake Xeon AVX2 Transpose4x4_Opt (290 ms) SSE2 Transpose4x4_Opt (302 ms) C Transpose4x4_Opt (522 ms) AMD Zen2 AVX2 Transpose4x4_Opt (136 ms) SSE2 Transpose4x4_Opt (137 ms) C Transpose4x4_Opt (431 ms) Bug: None Change-Id: I4997dbd5c5387c22bfd6c5960b421504e4bc8a2a Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4292946 Reviewed-by: Justin Green <greenjustin@google.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'unit_test')
-rw-r--r--unit_test/rotate_test.cc76
1 files changed, 65 insertions, 11 deletions
diff --git a/unit_test/rotate_test.cc b/unit_test/rotate_test.cc
index e8d2ca16..abc08efa 100644
--- a/unit_test/rotate_test.cc
+++ b/unit_test/rotate_test.cc
@@ -864,7 +864,55 @@ TEST_F(LibYUVRotateTest, I410Rotate270_Opt) {
#if defined(ENABLE_ROW_TESTS)
-TEST_F(LibYUVRotateTest, Transpose4x4) {
+TEST_F(LibYUVRotateTest, Transpose4x4_Test) {
+ // dst width and height
+ const int width = 4;
+ const int height = 4;
+ int src_pixels[4][4];
+ int dst_pixels_c[4][4];
+ int dst_pixels_opt[4][4];
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ src_pixels[i][j] = i * 10 + j;
+ }
+ }
+ memset(dst_pixels_c, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
+
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_c, width * 4, width);
+
+ const int benchmark_iterations =
+ (benchmark_iterations_ * benchmark_width_ * benchmark_height_ + 15) /
+ (4 * 4);
+ for (int i = 0; i < benchmark_iterations; ++i) {
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
+ Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ }
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]);
+ EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]);
+ }
+ }
+}
+
+TEST_F(LibYUVRotateTest, Transpose4x4_Opt) {
// dst width and height
const int width = ((benchmark_width_ * benchmark_height_ + 3) / 4 + 3) & ~3;
const int height = 4;
@@ -874,29 +922,35 @@ TEST_F(LibYUVRotateTest, Transpose4x4) {
MemRandomize(src_pixels, height * width * 4);
memset(dst_pixels_c, 1, width * height * 4);
- memset(dst_pixels_opt, 1, width * height * 4);
+ memset(dst_pixels_opt, 2, width * height * 4);
Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
(uint8_t*)dst_pixels_c, width * 4, width);
for (int i = 0; i < benchmark_iterations_; ++i) {
-#if defined(__aarch64__)
+#if defined(HAS_TRANSPOSE4X4_32_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
Transpose4x4_32_NEON((const uint8_t*)src_pixels, height * 4,
(uint8_t*)dst_pixels_opt, width * 4, width);
- } else {
+ } else
+#elif defined(HAS_TRANSPOSE4X4_32_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ Transpose4x4_32_AVX2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else if (TestCpuFlag(kCpuHasSSE2)) {
+ Transpose4x4_32_SSE2((const uint8_t*)src_pixels, height * 4,
+ (uint8_t*)dst_pixels_opt, width * 4, width);
+ } else
+#endif
+ {
Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
(uint8_t*)dst_pixels_opt, width * 4, width);
}
-#else
- Transpose4x4_32_C((const uint8_t*)src_pixels, height * 4,
- (uint8_t*)dst_pixels_opt, width * 4, width);
-#endif
}
- // for (int i = 0; i < width * height; ++i) {
- // EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
- // }
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
free_aligned_buffer_page_end(src_pixels);
free_aligned_buffer_page_end(dst_pixels_c);