diff options
author | Frank Barchard <fbarchard@google.com> | 2021-03-24 13:45:04 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2021-03-24 21:37:10 +0000 |
commit | 312c02a5aad4adda67cb2e0cc93a497d12845522 (patch) | |
tree | ce776a4db30d2319fad3bbf41fe48d6cdf1e2602 /unit_test/planar_test.cc | |
parent | d8f1bfc9816a9fc76f3a25cc0ee272fb9c07622a (diff) | |
download | libyuv-312c02a5aad4adda67cb2e0cc93a497d12845522.tar.gz |
Fixes for SplitUVPlane_16 and MergeUVPlane_16
Planar functions pass depth instead of scale factor.
Row functions pass shift instead of depth. Add assert to C.
AVX shift instruction expects a single shift value in XMM.
Neon pass shift as input (not output).
Split Neon reimplemented as left shift on shorts by negative to achieve right shift.
Add planar unitests
Bug: libyuv:888
Change-Id: I8fe62d3d777effc5321c361cd595c58b7f93807e
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2782086
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Diffstat (limited to 'unit_test/planar_test.cc')
-rw-r--r-- | unit_test/planar_test.cc | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index fd1755cd..75f1e5d5 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -2605,6 +2605,64 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { free_aligned_buffer_page_end(dst_pixels_c); } +// 16 bit channel split and merge +TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) { + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + align_buffer_page_end(src_pixels, kPixels * 2 * 2); + align_buffer_page_end(tmp_pixels_u_c, kPixels * 2); + align_buffer_page_end(tmp_pixels_v_c, kPixels * 2); + align_buffer_page_end(tmp_pixels_u_opt, kPixels * 2); + align_buffer_page_end(tmp_pixels_v_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2); + MemRandomize(src_pixels, kPixels * 2 * 2); + MemRandomize(tmp_pixels_u_c, kPixels * 2); + MemRandomize(tmp_pixels_v_c, kPixels * 2); + MemRandomize(tmp_pixels_u_opt, kPixels * 2); + MemRandomize(tmp_pixels_v_opt, kPixels * 2); + MemRandomize(dst_pixels_opt, kPixels * 2 * 2); + MemRandomize(dst_pixels_c, kPixels * 2 * 2); + + MaskCpuFlags(disable_cpu_flags_); + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)tmp_pixels_u_c, benchmark_width_, + (uint16_t*)tmp_pixels_v_c, benchmark_width_, benchmark_width_, + benchmark_height_, 12); + MergeUVPlane_16((const uint16_t*)tmp_pixels_u_c, benchmark_width_, + (const uint16_t*)tmp_pixels_v_c, benchmark_width_, + (uint16_t*)dst_pixels_c, benchmark_width_ * 2, + benchmark_width_, benchmark_height_, 12); + MaskCpuFlags(benchmark_cpu_info_); + + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)tmp_pixels_u_opt, benchmark_width_, + (uint16_t*)tmp_pixels_v_opt, benchmark_width_, + benchmark_width_, benchmark_height_, 12); + + for (int i = 0; i < benchmark_iterations_; ++i) { + MergeUVPlane_16((const uint16_t*)tmp_pixels_u_opt, benchmark_width_, + (const uint16_t*)tmp_pixels_v_opt, benchmark_width_, + (uint16_t*)dst_pixels_opt, benchmark_width_ * 2, + benchmark_width_, benchmark_height_, 12); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(tmp_pixels_u_c[i], tmp_pixels_u_opt[i]); + EXPECT_EQ(tmp_pixels_v_c[i], tmp_pixels_v_opt[i]); + } + for (int i = 0; i < kPixels * 2 * 2; ++i) { + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(tmp_pixels_u_c); + free_aligned_buffer_page_end(tmp_pixels_v_c); + free_aligned_buffer_page_end(tmp_pixels_u_opt); + free_aligned_buffer_page_end(tmp_pixels_v_opt); + free_aligned_buffer_page_end(dst_pixels_opt); + free_aligned_buffer_page_end(dst_pixels_c); +} + TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { // Round count up to multiple of 16 const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; @@ -2649,6 +2707,46 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { free_aligned_buffer_page_end(dst_pixels_c); } +// 16 bit channel split +TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) { + // Round count up to multiple of 16 + const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; + align_buffer_page_end(src_pixels, kPixels * 2 * 2); + align_buffer_page_end(dst_pixels_u_c, kPixels * 2); + align_buffer_page_end(dst_pixels_v_c, kPixels * 2); + align_buffer_page_end(dst_pixels_u_opt, kPixels * 2); + align_buffer_page_end(dst_pixels_v_opt, kPixels * 2); + MemRandomize(src_pixels, kPixels * 2 * 2); + MemRandomize(dst_pixels_u_c, kPixels * 2); + MemRandomize(dst_pixels_v_c, kPixels * 2); + MemRandomize(dst_pixels_u_opt, kPixels * 2); + MemRandomize(dst_pixels_v_opt, kPixels * 2); + + MaskCpuFlags(disable_cpu_flags_); + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)dst_pixels_u_c, benchmark_width_, + (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_, + benchmark_height_, 10); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2, + (uint16_t*)dst_pixels_u_opt, benchmark_width_, + (uint16_t*)dst_pixels_v_opt, benchmark_width_, + benchmark_width_, benchmark_height_, 10); + } + + for (int i = 0; i < kPixels * 2; ++i) { + EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); + EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); + } + free_aligned_buffer_page_end(src_pixels); + free_aligned_buffer_page_end(dst_pixels_u_c); + free_aligned_buffer_page_end(dst_pixels_v_c); + free_aligned_buffer_page_end(dst_pixels_u_opt); + free_aligned_buffer_page_end(dst_pixels_v_opt); +} + TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) { // Round count up to multiple of 16 const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15; |