diff options
author | Frank Barchard <fbarchard@google.com> | 2022-08-01 12:49:02 -0700 |
---|---|---|
committer | Frank Barchard <fbarchard@chromium.org> | 2022-08-01 22:54:48 +0000 |
commit | b028453ba61043a560298acae28eb813ea9bc67d (patch) | |
tree | ef1915fb359491eb88c283885baa6ba033a548e2 /unit_test | |
parent | fe8c78b61aaf5a8cb2045e8a6231daf2d904c1a0 (diff) | |
download | libyuv-b028453ba61043a560298acae28eb813ea9bc67d.tar.gz |
Disable bilinear 16 bit scale up for SSE2
- Undefine HAS_SCALEROWUP2_BILINEAR_16_SSE2
- Save XMM7 in ScaleRowUp2_Bilinear_16_SSE2().
- Rename HAS_SCALEROWUP2LINEAR_xxx to HAS_SCALEROWUP2_LINEAR_xxx
- DetileSplitUVRow_C() is implemented using SplitUVRow_C().
- Changes to unit_test/planar_test.cc.
Bug: libyuv:882
Change-Id: I0a8e8e5fb43bdf58ded87244e802343eacb789f2
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3795063
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'unit_test')
-rw-r--r-- | unit_test/planar_test.cc | 112 |
1 files changed, 54 insertions, 58 deletions
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 42166d0d..3a8c470b 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1673,80 +1673,85 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { free_aligned_buffer_page_end(dst_opt); } -TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { +// Compares DetileSplitUV to 2 step Detile + SplitUV +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { int i, j; // orig is tiled. Allocate enough memory for tiles. int orig_width = (benchmark_width_ + 15) & ~15; int orig_height = (benchmark_height_ + 15) & ~15; int orig_plane_size = orig_width * orig_height; - int u_plane_size = benchmark_width_ * benchmark_height_; - int v_plane_size = u_plane_size; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; align_buffer_page_end(orig_uv, orig_plane_size); - align_buffer_page_end(dst_u_c, u_plane_size); - align_buffer_page_end(dst_u_opt, u_plane_size); - align_buffer_page_end(dst_v_c, v_plane_size); - align_buffer_page_end(dst_v_opt, v_plane_size); + align_buffer_page_end(detiled_uv, orig_plane_size); + align_buffer_page_end(dst_u_two_stage, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_two_stage, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); MemRandomize(orig_uv, orig_plane_size); - memset(dst_u_c, 0, u_plane_size); - memset(dst_u_opt, 0, u_plane_size); - memset(dst_v_c, 0, v_plane_size); - memset(dst_v_opt, 0, v_plane_size); + memset(detiled_uv, 0, orig_plane_size); + memset(dst_u_two_stage, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_two_stage, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); - // Disable all optimizations. - MaskCpuFlags(disable_cpu_flags_); - for (j = 0; j < benchmark_iterations_; j++) { - DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, - dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, - benchmark_height_, 16); - } + DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, + dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); - // Enable optimizations. - MaskCpuFlags(benchmark_cpu_info_); + // Benchmark 2 step conversion for comparison. for (j = 0; j < benchmark_iterations_; j++) { - DetileSplitUVPlane( - orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt, - (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); + DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, + benchmark_width_, benchmark_height_, 16); + SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, + (benchmark_width_ + 1) / 2, dst_v_two_stage, + (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2, + benchmark_height_); } - for (i = 0; i < u_plane_size; ++i) { - EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); - } - for (i = 0; i < v_plane_size; ++i) { - EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); + for (i = 0; i < uv_plane_size; ++i) { + EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); } free_aligned_buffer_page_end(orig_uv); - free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(detiled_uv); + free_aligned_buffer_page_end(dst_u_two_stage); free_aligned_buffer_page_end(dst_u_opt); - free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_v_two_stage); free_aligned_buffer_page_end(dst_v_opt); } -// TODO(b/228518489): Fix Segmentation fault in this test -TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) { +TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { int i, j; // orig is tiled. Allocate enough memory for tiles. int orig_width = (benchmark_width_ + 15) & ~15; int orig_height = (benchmark_height_ + 15) & ~15; int orig_plane_size = orig_width * orig_height; - int u_plane_size = benchmark_width_ * benchmark_height_; - int v_plane_size = u_plane_size; + int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_; align_buffer_page_end(orig_uv, orig_plane_size); - align_buffer_page_end(detiled_uv, orig_plane_size); - align_buffer_page_end(dst_u_two_stage, u_plane_size); - align_buffer_page_end(dst_u_opt, u_plane_size); - align_buffer_page_end(dst_v_two_stage, v_plane_size); - align_buffer_page_end(dst_v_opt, v_plane_size); + align_buffer_page_end(dst_u_c, uv_plane_size); + align_buffer_page_end(dst_u_opt, uv_plane_size); + align_buffer_page_end(dst_v_c, uv_plane_size); + align_buffer_page_end(dst_v_opt, uv_plane_size); MemRandomize(orig_uv, orig_plane_size); - memset(detiled_uv, 0, orig_plane_size); - memset(dst_u_two_stage, 0, u_plane_size); - memset(dst_u_opt, 0, u_plane_size); - memset(dst_v_two_stage, 0, v_plane_size); - memset(dst_v_opt, 0, v_plane_size); + memset(dst_u_c, 0, uv_plane_size); + memset(dst_u_opt, 0, uv_plane_size); + memset(dst_v_c, 0, uv_plane_size); + memset(dst_v_opt, 0, uv_plane_size); + + // Disable all optimizations. + MaskCpuFlags(disable_cpu_flags_); + + DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2, + dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_, + benchmark_height_, 16); + + // Enable optimizations. + MaskCpuFlags(benchmark_cpu_info_); for (j = 0; j < benchmark_iterations_; j++) { DetileSplitUVPlane( @@ -1754,24 +1759,15 @@ TEST_F(LibYUVPlanarTest, DISABLED_TestDetileSplitUVPlane_Correctness) { (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16); } - DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_, - benchmark_width_, benchmark_height_, 16); - SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage, - (benchmark_width_ + 1) / 2, dst_v_two_stage, - (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_); - - for (i = 0; i < u_plane_size; ++i) { - EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]); - } - for (i = 0; i < v_plane_size; ++i) { - EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); + for (i = 0; i < uv_plane_size; ++i) { + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); } free_aligned_buffer_page_end(orig_uv); - free_aligned_buffer_page_end(detiled_uv); - free_aligned_buffer_page_end(dst_u_two_stage); + free_aligned_buffer_page_end(dst_u_c); free_aligned_buffer_page_end(dst_u_opt); - free_aligned_buffer_page_end(dst_v_two_stage); + free_aligned_buffer_page_end(dst_v_c); free_aligned_buffer_page_end(dst_v_opt); } |