diff options
author | Chong Zhang <chz@google.com> | 2019-07-01 16:31:02 -0700 |
---|---|---|
committer | Chong Zhang <chz@google.com> | 2019-07-01 17:26:16 -0700 |
commit | 211e09de645270d087fec038afaadb8343e45f03 (patch) | |
tree | 9e8f433f6be822b0955c52d702888844d3caf8c8 /files/unit_test | |
parent | ab123ac62c872f89e20f10c96e651ead21414ffc (diff) | |
download | libyuv-211e09de645270d087fec038afaadb8343e45f03.tar.gz |
libyuv roll to r1732 to pick up security fixes
bug: 135532289
Change-Id: Ibf05a0e54c7bc882788194862cdd94fccfba5ebf
Diffstat (limited to 'files/unit_test')
-rw-r--r-- | files/unit_test/convert_test.cc | 258 | ||||
-rw-r--r-- | files/unit_test/planar_test.cc | 106 | ||||
-rw-r--r-- | files/unit_test/rotate_test.cc | 117 | ||||
-rw-r--r-- | files/unit_test/scale_test.cc | 355 |
4 files changed, 720 insertions, 116 deletions
diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc index d97b4fc7..32a4cd1c 100644 --- a/files/unit_test/convert_test.cc +++ b/files/unit_test/convert_test.cc @@ -311,10 +311,10 @@ int I400ToNV21(const uint8_t* src_y, SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ OFF); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kWidth; ++j) \ @@ -329,21 +329,21 @@ int I400ToNV21(const uint8_t* src_y, } \ memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_uv_c, 2, \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_uv_opt, 102, \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ - dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ + dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ - dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ + dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ } \ int max_diff = 0; \ for (int i = 0; i < kHeight; ++i) { \ @@ -357,12 +357,12 @@ int I400ToNV21(const uint8_t* src_y, } \ EXPECT_LE(max_diff, 1); \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ int abs_diff = \ abs(static_cast<int>( \ - dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \ + dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ static_cast<int>( \ - dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \ + dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ if (abs_diff > max_diff) { \ max_diff = abs_diff; \ } \ @@ -395,6 +395,99 @@ TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2) TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2) TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) +#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \ + OFF) \ + TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \ + (fastrand() & 0xff); \ + src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \ + (fastrand() & 0xff); \ + } \ + } \ + memset(dst_y_c, 1, kWidth* kHeight); \ + memset(dst_uv_c, 2, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ + memset(dst_uv_opt, 102, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + MaskCpuFlags(disable_cpu_flags_); \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_uv + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR( \ + src_y + OFF, kWidth, src_uv + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ + static_cast<int>(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ + int abs_diff = \ + abs(static_cast<int>( \ + dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ + static_cast<int>( \ + dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + free_aligned_buffer_page_end(dst_y_c); \ + free_aligned_buffer_page_end(dst_uv_c); \ + free_aligned_buffer_page_end(dst_y_opt); \ + free_aligned_buffer_page_end(dst_uv_opt); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + } + +#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ + TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ + SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) + +// TODO(fbarchard): Fix msan on this unittest +// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2) + #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \ DOY) \ @@ -680,8 +773,8 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2) TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - W1280, DIFF, N, NEG, OFF) \ +#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \ + BPP_B, W1280, DIFF, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = benchmark_height_; \ @@ -716,9 +809,9 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ - FMT_B##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ + FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ kHeight); \ - FMT_B##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ + FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ kHeight); \ int max_diff = 0; \ for (int i = 0; i < kHeight; ++i) { \ @@ -740,25 +833,27 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2) free_aligned_buffer_page_end(dst_argb32_opt); \ } -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_ - 4, DIFF, _Any, +, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Unaligned, +, 1) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Invert, -, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ +#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + DIFF) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ benchmark_width_, DIFF, _Opt, +, 0) -TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2) -TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2) -TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2) -TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2) -TESTBIPLANARTOB(NV12, 2, 2, RAW, 3, 2) -TESTBIPLANARTOB(NV21, 2, 2, RAW, 3, 2) -TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) +TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2) +TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2) +TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2) +TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9) +TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2) #ifdef DO_THREE_PLANES // Do 3 allocations for yuv. conventional but slower. @@ -885,26 +980,27 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ benchmark_width_, DIFF, _Opt, +, 0) +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR) TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR) +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) +TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) +// TODO(fbarchard): Investigate J420 error of 11 on Windows. +TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 11) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) -// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9. -TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) -TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) -TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) -TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) -TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) -TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2) -TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) -TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) #define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \ SUBSAMP_Y, W1280, N, NEG, OFF) \ @@ -978,6 +1074,8 @@ TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) +TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2) +TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) #define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, DIFF, N, NEG, OFF) \ @@ -1069,45 +1167,46 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) HEIGHT_B, DIFF) // TODO(fbarchard): make ARM version of C code that matches NEON. +TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) -TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) -TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) -TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) +TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) +TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0) TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0) -TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) +TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) +TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR) TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0) -TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) -TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0) -TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) #define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \ HEIGHT_B, W1280, DIFF, N, NEG, OFF) \ @@ -1291,6 +1390,7 @@ TEST_F(LibYUVConvertTest, ValidateJpeg) { // EOI, SOI. Expect pass. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; orig_pixels[kSize - kOff + 0] = 0xff; orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. for (int times = 0; times < benchmark_iterations_; ++times) { @@ -1317,6 +1417,7 @@ TEST_F(LibYUVConvertTest, ValidateJpegLarge) { // EOI, SOI. Expect pass. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; orig_pixels[kSize - kOff + 0] = 0xff; orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. for (int times = 0; times < benchmark_iterations_; ++times) { @@ -1350,6 +1451,7 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) { // SOI but no EOI. Expect fail. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; for (int times = 0; times < benchmark_iterations_; ++times) { EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); } @@ -1367,22 +1469,24 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) { TEST_F(LibYUVConvertTest, FuzzJpeg) { // SOI but no EOI. Expect fail. for (int times = 0; times < benchmark_iterations_; ++times) { - const int kSize = fastrand() % 5000 + 2; + const int kSize = fastrand() % 5000 + 3; align_buffer_page_end(orig_pixels, kSize); MemRandomize(orig_pixels, kSize); // Add SOI so frame will be scanned. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. + orig_pixels[2] = 0xff; orig_pixels[kSize - 1] = 0xff; - ValidateJpeg(orig_pixels, kSize); // Failure normally expected. + ValidateJpeg(orig_pixels, + kSize); // Failure normally expected. free_aligned_buffer_page_end(orig_pixels); } } -// Test data created in GIMP. In export jpeg, disable thumbnails etc, -// choose a subsampling, and use low quality (50) to keep size small. -// Generated with xxd -i test.jpg +// Test data created in GIMP. In export jpeg, disable +// thumbnails etc, choose a subsampling, and use low quality +// (50) to keep size small. Generated with xxd -i test.jpg // test 0 is J400 static const uint8_t kTest0Jpg[] = { 0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, @@ -1984,8 +2088,8 @@ TEST_F(LibYUVConvertTest, TestMJPGInfo) { EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); - EXPECT_EQ(1, - ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported. + EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, + kTest4JpgLen)); // Valid but unsupported. } #endif // HAVE_JPEG @@ -2903,7 +3007,8 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) { } // Test 10 bit YUV to 10 bit RGB -// Caveat: Result is near due to float rounding in expected result. +// Caveat: Result is near due to float rounding in expected +// result. TEST_F(LibYUVConvertTest, TestH010ToAR30) { const int kSize = 1024; int histogram_b[1024]; @@ -2966,7 +3071,8 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) { } // Test 10 bit YUV to 10 bit RGB -// Caveat: Result is near due to float rounding in expected result. +// Caveat: Result is near due to float rounding in expected +// result. TEST_F(LibYUVConvertTest, TestH010ToAB30) { const int kSize = 1024; int histogram_b[1024]; diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc index 75608955..70f8966e 100644 --- a/files/unit_test/planar_test.cc +++ b/files/unit_test/planar_test.cc @@ -3186,7 +3186,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { } GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640); for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) int has_neon = TestCpuFlag(kCpuHasNEON); if (has_neon) { GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640); @@ -3239,7 +3240,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { &orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0], 640); for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) { -#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) int has_neon = TestCpuFlag(kCpuHasNEON); if (has_neon) { GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2], @@ -3267,4 +3269,104 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704)); } +float TestFloatDivToByte(int benchmark_width, + int benchmark_height, + int benchmark_iterations, + float scale, + bool opt) { + int i, j; + // NEON does multiple of 8, so round count up + const int kPixels = (benchmark_width * benchmark_height + 7) & ~7; + align_buffer_page_end(src_weights, kPixels * 4); + align_buffer_page_end(src_values, kPixels * 4); + align_buffer_page_end(dst_out_c, kPixels); + align_buffer_page_end(dst_out_opt, kPixels); + align_buffer_page_end(dst_mask_c, kPixels); + align_buffer_page_end(dst_mask_opt, kPixels); + + // Randomize works but may contain some denormals affecting performance. + // MemRandomize(orig_y, kPixels * 4); + // large values are problematic. audio is really -1 to 1. + for (i = 0; i < kPixels; ++i) { + (reinterpret_cast<float*>(src_weights))[i] = scale; + (reinterpret_cast<float*>(src_values))[i] = + sinf(static_cast<float>(i) * 0.1f); + } + memset(dst_out_c, 0, kPixels); + memset(dst_out_opt, 1, kPixels); + memset(dst_mask_c, 2, kPixels); + memset(dst_mask_opt, 3, kPixels); + + FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_c, + dst_mask_c, kPixels); + + for (j = 0; j < benchmark_iterations; j++) { + if (opt) { +#ifdef HAS_FLOATDIVTOBYTEROW_NEON + FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_opt, + dst_mask_opt, kPixels); +#else + FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_opt, + dst_mask_opt, kPixels); +#endif + } else { + FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights), + reinterpret_cast<float*>(src_values), dst_out_opt, + dst_mask_opt, kPixels); + } + } + + uint8_t max_diff = 0; + for (i = 0; i < kPixels; ++i) { + uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) + + abs(dst_mask_c[i] - dst_mask_opt[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(src_weights); + free_aligned_buffer_page_end(src_values); + free_aligned_buffer_page_end(dst_out_c); + free_aligned_buffer_page_end(dst_out_opt); + free_aligned_buffer_page_end(dst_mask_c); + free_aligned_buffer_page_end(dst_mask_opt); + + return max_diff; +} + +TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) { + float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, false); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) { + float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_, + benchmark_iterations_, 1.2f, true); + EXPECT_EQ(0, diff); +} + +TEST_F(LibYUVPlanarTest, UVToVURow) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_vu, kPixels * 2); + align_buffer_page_end(dst_pixels_uv, kPixels * 2); + + MemRandomize(src_pixels_vu, kPixels * 2); + memset(dst_pixels_uv, 1, kPixels * 2); + + UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels); + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); + EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); + } + + free_aligned_buffer_page_end(src_pixels_vu); + free_aligned_buffer_page_end(dst_pixels_uv); +} + } // namespace libyuv diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc index d04b96e9..61941e63 100644 --- a/files/unit_test/rotate_test.cc +++ b/files/unit_test/rotate_test.cc @@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) { benchmark_cpu_info_); } +static void I444TestRotate(int src_width, + int src_height, + int dst_width, + int dst_height, + libyuv::RotationMode mode, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (src_width < 1) { + src_width = 1; + } + if (src_height == 0) { + src_height = 1; + } + if (dst_width < 1) { + dst_width = 1; + } + if (dst_height < 1) { + dst_height = 1; + } + int src_i444_y_size = src_width * Abs(src_height); + int src_i444_uv_size = src_width * Abs(src_height); + int src_i444_size = src_i444_y_size + src_i444_uv_size * 2; + align_buffer_page_end(src_i444, src_i444_size); + for (int i = 0; i < src_i444_size; ++i) { + src_i444[i] = fastrand() & 0xff; + } + + int dst_i444_y_size = dst_width * dst_height; + int dst_i444_uv_size = dst_width * dst_height; + int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2; + align_buffer_page_end(dst_i444_c, dst_i444_size); + align_buffer_page_end(dst_i444_opt, dst_i444_size); + memset(dst_i444_c, 2, dst_i444_size); + memset(dst_i444_opt, 3, dst_i444_size); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width, + dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width, + src_width, src_height, mode); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (int i = 0; i < benchmark_iterations; ++i) { + I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width, + src_i444 + src_i444_y_size + src_i444_uv_size, src_width, + dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size, + dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size, + dst_width, src_width, src_height, mode); + } + + // Rotation should be exact. + for (int i = 0; i < dst_i444_size; ++i) { + EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]); + } + + free_aligned_buffer_page_end(dst_i444_c); + free_aligned_buffer_page_end(dst_i444_opt); + free_aligned_buffer_page_end(src_i444); +} + +TEST_F(LibYUVRotateTest, I444Rotate0_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate0, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate90_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate90, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate180_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_, + benchmark_height_, kRotate180, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, I444Rotate270_Opt) { + I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_, + benchmark_width_, kRotate270, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_); +} + +// TODO(fbarchard): Remove odd width tests. +// Odd width tests work but disabled because they use C code and can be +// tested by passing an odd width command line or environment variable. +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, kRotate0, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, kRotate90, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_width_ - 3, benchmark_height_ - 1, kRotate180, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + +TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) { + I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1, + benchmark_height_ - 1, benchmark_width_ - 3, kRotate270, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_); +} + static void NV12TestRotate(int src_width, int src_height, int dst_width, diff --git a/files/unit_test/scale_test.cc b/files/unit_test/scale_test.cc index d97d54a8..811b2d04 100644 --- a/files/unit_test/scale_test.cc +++ b/files/unit_test/scale_test.cc @@ -22,14 +22,14 @@ namespace libyuv { // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. -static int TestFilter(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { +static int I420TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { if (!SizeValid(src_width, src_height, dst_width, dst_height)) { return 0; } @@ -141,14 +141,14 @@ static int TestFilter(int src_width, // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. // 0 = exact. -static int TestFilter_16(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int disable_cpu_flags, - int benchmark_cpu_info) { +static int I420TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { if (!SizeValid(src_width, src_height, dst_width, dst_height)) { return 0; } @@ -256,6 +256,241 @@ static int TestFilter_16(int src_width, return max_diff; } +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int I444TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + if (!src_y || !src_u || !src_v) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_u_c, dst_uv_plane_size); + align_buffer_page_end(dst_v_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_u_opt, dst_uv_plane_size); + align_buffer_page_end(dst_v_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt || + !dst_v_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_c, dst_stride_y, dst_u_c, + dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f); + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt, + dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height, + f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + // Report performance of C vs OPT. + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference is not + // over 3. + int max_diff = 0; + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv); ++j) { + int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - + dst_u_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] - + dst_v_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_u_c); + free_aligned_buffer_page_end(dst_v_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_u_opt); + free_aligned_buffer_page_end(dst_v_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + + return max_diff; +} + +// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference. +// 0 = exact. +static int I444TestFilter_16(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i; + int src_width_uv = Abs(src_width); + int src_height_uv = Abs(src_height); + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv); + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_u, src_uv_plane_size); + align_buffer_page_end(src_v, src_uv_plane_size); + align_buffer_page_end(src_y_16, src_y_plane_size * 2); + align_buffer_page_end(src_u_16, src_uv_plane_size * 2); + align_buffer_page_end(src_v_16, src_uv_plane_size * 2); + if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16); + uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16); + uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16); + + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_u, src_uv_plane_size); + MemRandomize(src_v, src_uv_plane_size); + + for (i = 0; i < src_y_plane_size; ++i) { + p_src_y_16[i] = src_y[i]; + } + for (i = 0; i < src_uv_plane_size; ++i) { + p_src_u_16[i] = src_u[i]; + p_src_v_16[i] = src_v[i]; + } + + int dst_width_uv = dst_width; + int dst_height_uv = dst_height; + + int dst_y_plane_size = (dst_width) * (dst_height); + int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv); + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv; + + align_buffer_page_end(dst_y_8, dst_y_plane_size); + align_buffer_page_end(dst_u_8, dst_uv_plane_size); + align_buffer_page_end(dst_v_8, dst_uv_plane_size); + align_buffer_page_end(dst_y_16, dst_y_plane_size * 2); + align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2); + align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2); + + uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16); + uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16); + uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16); + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv, + src_width, src_height, dst_y_8, dst_stride_y, dst_u_8, + dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f); + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + for (i = 0; i < benchmark_iterations; ++i) { + I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv, + p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16, + dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16, + dst_stride_uv, dst_width, dst_height, f); + } + + // Expect an exact match. + int max_diff = 0; + for (i = 0; i < dst_y_plane_size; ++i) { + int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + for (i = 0; i < dst_uv_plane_size; ++i) { + int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + + free_aligned_buffer_page_end(dst_y_8); + free_aligned_buffer_page_end(dst_u_8); + free_aligned_buffer_page_end(dst_v_8); + free_aligned_buffer_page_end(dst_y_16); + free_aligned_buffer_page_end(dst_u_16); + free_aligned_buffer_page_end(dst_v_16); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_u); + free_aligned_buffer_page_end(src_v); + free_aligned_buffer_page_end(src_y_16); + free_aligned_buffer_page_end(src_u_16); + free_aligned_buffer_page_end(src_v_16); + + return max_diff; +} + // The following adjustments in dimensions ensure the scale factor will be // exactly achieved. // 2 is chroma subsample. @@ -263,16 +498,32 @@ static int TestFilter_16(int src_width, #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2) #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \ - TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \ - int diff = TestFilter( \ + TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \ + int diff = I420TestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \ + int diff = I444TestFilter( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \ - int diff = TestFilter_16( \ + TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \ + int diff = I420TestFilter_16( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \ + int diff = I444TestFilter_16( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ @@ -300,30 +551,58 @@ TEST_FACTOR(3, 1, 3, 0) #undef DX #define TEST_SCALETO1(name, width, height, filter, max_diff) \ - TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \ - int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \ - kFilter##filter, benchmark_iterations_, \ - disable_cpu_flags_, benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \ + int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \ + int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16( \ + benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \ + int diff = I420TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ - int diff = TestFilter(width, height, Abs(benchmark_width_), \ - Abs(benchmark_height_), kFilter##filter, \ - benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \ + int diff = I444TestFilter(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \ - int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \ - height, kFilter##filter, benchmark_iterations_, \ - disable_cpu_flags_, benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, \ + I420##name##From##width##x##height##_##filter##_16) { \ + int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \ - int diff = TestFilter_16(width, height, Abs(benchmark_width_), \ - Abs(benchmark_height_), kFilter##filter, \ - benchmark_iterations_, disable_cpu_flags_, \ - benchmark_cpu_info_); \ + TEST_F(LibYUVScaleTest, \ + I444##name##From##width##x##height##_##filter##_16) { \ + int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \ + Abs(benchmark_height_), kFilter##filter, \ + benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } |