diff options
author | Frank Barchard <fbarchard@google.com> | 2020-09-28 12:41:52 -0700 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2020-09-28 20:13:21 +0000 |
commit | 7a52fde1c4eb00790bd647b50842797daa5222e6 (patch) | |
tree | 6f7c07526562863a0ff2d0b2d76421d762f7868b | |
parent | d6833cda383bace2c98190fe0df504609c9ae074 (diff) | |
download | libyuv-7a52fde1c4eb00790bd647b50842797daa5222e6.tar.gz |
NV12Scale function using split/merge on UV channal
Bug: libyuv:718, libyuv:838, b/168918847
Change-Id: I78b27baac50f0ce955e00cb6aaf7dfe5a0cb1e3d
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2432067
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
-rw-r--r-- | README.chromium | 2 | ||||
-rw-r--r-- | include/libyuv/scale.h | 25 | ||||
-rw-r--r-- | include/libyuv/version.h | 2 | ||||
-rw-r--r-- | source/row_gcc.cc | 25 | ||||
-rw-r--r-- | source/scale.cc | 65 | ||||
-rw-r--r-- | unit_test/scale_test.cc | 118 |
6 files changed, 232 insertions, 5 deletions
diff --git a/README.chromium b/README.chromium index a2a6f977..253ac353 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1763 +Version: 1764 License: BSD License File: LICENSE diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h index 23ba1634..add5a9eb 100644 --- a/include/libyuv/scale.h +++ b/include/libyuv/scale.h @@ -145,6 +145,31 @@ int I444Scale_16(const uint16_t* src_y, int dst_height, enum FilterMode filtering); +// Scales an NV12 image from the src width and height to the +// dst width and height. +// If filtering is kFilterNone, a simple nearest-neighbor algorithm is +// used. This produces basic (blocky) quality at the fastest speed. +// If filtering is kFilterBilinear, interpolation is used to produce a better +// quality image, at the expense of speed. +// kFilterBox is not supported for the UV channel and will be treated as +// bilinear. +// Returns 0 if successful. + +LIBYUV_API +int NV12Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int dst_width, + int dst_height, + enum FilterMode filtering); + #ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 1172f9a3..7c2d059e 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1763 +#define LIBYUV_VERSION 1764 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 709f0709..c0541888 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -7064,7 +7064,6 @@ void HalfMergeUVRow_SSSE3(const uint8_t* src_u, "psrlw $0xf,%%xmm4 \n" "packuswb %%xmm4,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" - "1: \n" LABELALIGN "1: \n" @@ -7111,7 +7110,6 @@ void HalfMergeUVRow_AVX2(const uint8_t* src_u, "vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n" - "1: \n" LABELALIGN "1: \n" @@ -7148,6 +7146,29 @@ void HalfMergeUVRow_AVX2(const uint8_t* src_u, : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } +void ClampFloatToZero_SSE2(const float* src_x, + float * dst_y, + int width) { + + asm volatile( + "pxor %%xmm1,%%xmm1 \n" + + LABELALIGN + "1: \n" + "movd (%0),%%xmm0 \n" // load float + "maxss %%xmm1, %%xmm0 \n" // clamp to zero + "add 4, %0 \n" + "movd %%xmm0, (%1) \n" // store float + "add 4, %1 \n" + "sub $0x4,%2 \n" // 1 float per loop + "jg 1b \n" + : "+r"(src_x), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); +} + #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus diff --git a/source/scale.cc b/source/scale.cc index b17920a6..d26bfec7 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -1670,7 +1670,7 @@ void ScalePlane_16(const uint16_t* src, } if (dst_width == src_width && filtering != kFilterBox) { int dy = FixedDiv(src_height, dst_height); - // Arbitrary scale vertically, but unscaled vertically. + // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, 0, dy, 1, filtering); return; @@ -1869,6 +1869,69 @@ int I444Scale_16(const uint16_t* src_y, return 0; } +// Scale an NV12 image. +// This function in turn calls a scaling function for each plane. + +// TODO(https://bugs.chromium.org/p/libyuv/issues/detail?id=838): Remove +// this once libyuv implements NV12Scale and use the libyuv::NV12Scale(). +// This is copy-pasted from +// webrtc/common_video/libyuv/include/webrtc_libyuv.h +int NV12Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int dst_width, + int dst_height, + enum FilterMode filtering) { + const int src_chroma_width = (src_width + 1) / 2; + const int src_chroma_height = (src_height + 1) / 2; + + if (src_width == dst_width && src_height == dst_height) { + // No scaling. + libyuv::CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, src_width, + src_height); + libyuv::CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, + src_chroma_width * 2, src_chroma_height); + return 0; + } + + // Scaling. + // Allocate temporary memory for spitting UV planes and scaling them. + const int dst_chroma_width = (dst_width + 1) / 2; + const int dst_chroma_height = (dst_height + 1) / 2; + + align_buffer_64(tmp_buffer, + src_chroma_width * src_chroma_height * 2 + + dst_chroma_width * dst_chroma_height * 2); + + uint8_t* const src_u = tmp_buffer; + uint8_t* const src_v = src_u + src_chroma_width * src_chroma_height; + uint8_t* const dst_u = src_v + src_chroma_width * src_chroma_height; + uint8_t* const dst_v = dst_u + dst_chroma_width * dst_chroma_height; + + // Split source UV plane into separate U and V plane using the temporary data. + libyuv::SplitUVPlane(src_uv, src_stride_uv, src_u, src_chroma_width, src_v, + src_chroma_width, src_chroma_width, src_chroma_height); + + // Scale the planes. + libyuv::I420Scale( + src_y, src_stride_y, src_u, src_chroma_width, src_v, src_chroma_width, + src_width, src_height, dst_y, dst_stride_y, dst_u, dst_chroma_width, + dst_v, dst_chroma_width, dst_width, dst_height, filtering); + + // Merge the UV planes into the destination. + libyuv::MergeUVPlane(dst_u, dst_chroma_width, dst_v, dst_chroma_width, dst_uv, + dst_stride_uv, dst_chroma_width, dst_chroma_height); + free_aligned_buffer_64(tmp_buffer); + return 0; +} + // Deprecated api LIBYUV_API int Scale(const uint8_t* src_y, diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index 1508bf25..86ac8164 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -494,6 +494,110 @@ static int I444TestFilter_16(int src_width, return max_diff; } +// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. +static int NV12TestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info) { + if (!SizeValid(src_width, src_height, dst_width, dst_height)) { + return 0; + } + + int i, j; + int src_width_uv = (Abs(src_width) + 1) >> 1; + int src_height_uv = (Abs(src_height) + 1) >> 1; + + int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv) * 2; + + int src_stride_y = Abs(src_width); + int src_stride_uv = src_width_uv * 2; + + align_buffer_page_end(src_y, src_y_plane_size); + align_buffer_page_end(src_uv, src_uv_plane_size); + if (!src_y || !src_uv) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + MemRandomize(src_y, src_y_plane_size); + MemRandomize(src_uv, src_uv_plane_size); + + int dst_width_uv = (dst_width + 1) >> 1; + int dst_height_uv = (dst_height + 1) >> 1; + + int64_t dst_y_plane_size = (dst_width) * (dst_height); + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv) * 2; + + int dst_stride_y = dst_width; + int dst_stride_uv = dst_width_uv * 2; + + align_buffer_page_end(dst_y_c, dst_y_plane_size); + align_buffer_page_end(dst_uv_c, dst_uv_plane_size); + align_buffer_page_end(dst_y_opt, dst_y_plane_size); + align_buffer_page_end(dst_uv_opt, dst_uv_plane_size); + if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) { + printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); + return 0; + } + + MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization. + double c_time = get_time(); + NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, + src_width, src_height, dst_y_c, dst_stride_y, dst_uv_c, + dst_stride_uv, dst_width, dst_height, f); + c_time = (get_time() - c_time); + + MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization. + double opt_time = get_time(); + for (i = 0; i < benchmark_iterations; ++i) { + NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, + src_width, src_height, dst_y_opt, dst_stride_y, dst_uv_opt, + dst_stride_uv, dst_width, dst_height, f); + } + opt_time = (get_time() - opt_time) / benchmark_iterations; + // Report performance of C vs OPT. + printf("filter %d - %8d us C - %8d us OPT\n", f, + static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); + + // C version may be a little off from the optimized. Order of + // operations may introduce rounding somewhere. So do a difference + // of the buffers and look to see that the max difference is not + // over 3. + int max_diff = 0; + for (i = 0; i < (dst_height); ++i) { + for (j = 0; j < (dst_width); ++j) { + int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - + dst_y_opt[(i * dst_stride_y) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + for (i = 0; i < (dst_height_uv); ++i) { + for (j = 0; j < (dst_width_uv * 2); ++j) { + int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] - + dst_uv_opt[(i * dst_stride_uv) + j]); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } + } + } + + free_aligned_buffer_page_end(dst_y_c); + free_aligned_buffer_page_end(dst_uv_c); + free_aligned_buffer_page_end(dst_y_opt); + free_aligned_buffer_page_end(dst_uv_opt); + free_aligned_buffer_page_end(src_y); + free_aligned_buffer_page_end(src_uv); + + return max_diff; +} + // The following adjustments in dimensions ensure the scale factor will be // exactly achieved. // 2 is chroma subsample. @@ -532,6 +636,14 @@ static int I444TestFilter_16(int src_width, kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \ + int diff = NV12TestFilter( \ + SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ + DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ + kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ + benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ } // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but @@ -617,6 +729,12 @@ TEST_FACTOR(3, 1, 3, 0) benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ + } \ + TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \ + int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \ + height, kFilter##filter, benchmark_iterations_, \ + disable_cpu_flags_, benchmark_cpu_info_); \ + EXPECT_LE(diff, max_diff); \ } #ifdef ENABLE_SLOW_TESTS |