aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2020-09-28 12:41:52 -0700
committerCommit Bot <commit-bot@chromium.org>2020-09-28 20:13:21 +0000
commit7a52fde1c4eb00790bd647b50842797daa5222e6 (patch)
tree6f7c07526562863a0ff2d0b2d76421d762f7868b
parentd6833cda383bace2c98190fe0df504609c9ae074 (diff)
downloadlibyuv-7a52fde1c4eb00790bd647b50842797daa5222e6.tar.gz
NV12Scale function using split/merge on UV channal
Bug: libyuv:718, libyuv:838, b/168918847 Change-Id: I78b27baac50f0ce955e00cb6aaf7dfe5a0cb1e3d Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2432067 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/scale.h25
-rw-r--r--include/libyuv/version.h2
-rw-r--r--source/row_gcc.cc25
-rw-r--r--source/scale.cc65
-rw-r--r--unit_test/scale_test.cc118
6 files changed, 232 insertions, 5 deletions
diff --git a/README.chromium b/README.chromium
index a2a6f977..253ac353 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1763
+Version: 1764
License: BSD
License File: LICENSE
diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h
index 23ba1634..add5a9eb 100644
--- a/include/libyuv/scale.h
+++ b/include/libyuv/scale.h
@@ -145,6 +145,31 @@ int I444Scale_16(const uint16_t* src_y,
int dst_height,
enum FilterMode filtering);
+// Scales an NV12 image from the src width and height to the
+// dst width and height.
+// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
+// used. This produces basic (blocky) quality at the fastest speed.
+// If filtering is kFilterBilinear, interpolation is used to produce a better
+// quality image, at the expense of speed.
+// kFilterBox is not supported for the UV channel and will be treated as
+// bilinear.
+// Returns 0 if successful.
+
+LIBYUV_API
+int NV12Scale(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 1172f9a3..7c2d059e 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1763
+#define LIBYUV_VERSION 1764
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 709f0709..c0541888 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -7064,7 +7064,6 @@ void HalfMergeUVRow_SSSE3(const uint8_t* src_u,
"psrlw $0xf,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n"
- "1: \n"
LABELALIGN
"1: \n"
@@ -7111,7 +7110,6 @@ void HalfMergeUVRow_AVX2(const uint8_t* src_u,
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
"vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
- "1: \n"
LABELALIGN
"1: \n"
@@ -7148,6 +7146,29 @@ void HalfMergeUVRow_AVX2(const uint8_t* src_u,
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
+void ClampFloatToZero_SSE2(const float* src_x,
+ float * dst_y,
+ int width) {
+
+ asm volatile(
+ "pxor %%xmm1,%%xmm1 \n"
+
+ LABELALIGN
+ "1: \n"
+ "movd (%0),%%xmm0 \n" // load float
+ "maxss %%xmm1, %%xmm0 \n" // clamp to zero
+ "add 4, %0 \n"
+ "movd %%xmm0, (%1) \n" // store float
+ "add 4, %1 \n"
+ "sub $0x4,%2 \n" // 1 float per loop
+ "jg 1b \n"
+ : "+r"(src_x), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "memory", "cc", "xmm0", "xmm1");
+}
+
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
diff --git a/source/scale.cc b/source/scale.cc
index b17920a6..d26bfec7 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -1670,7 +1670,7 @@ void ScalePlane_16(const uint16_t* src,
}
if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height);
- // Arbitrary scale vertically, but unscaled vertically.
+ // Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, 0, 0, dy, 1, filtering);
return;
@@ -1869,6 +1869,69 @@ int I444Scale_16(const uint16_t* src_y,
return 0;
}
+// Scale an NV12 image.
+// This function in turn calls a scaling function for each plane.
+
+// TODO(https://bugs.chromium.org/p/libyuv/issues/detail?id=838): Remove
+// this once libyuv implements NV12Scale and use the libyuv::NV12Scale().
+// This is copy-pasted from
+// webrtc/common_video/libyuv/include/webrtc_libyuv.h
+int NV12Scale(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ const int src_chroma_width = (src_width + 1) / 2;
+ const int src_chroma_height = (src_height + 1) / 2;
+
+ if (src_width == dst_width && src_height == dst_height) {
+ // No scaling.
+ libyuv::CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, src_width,
+ src_height);
+ libyuv::CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv,
+ src_chroma_width * 2, src_chroma_height);
+ return 0;
+ }
+
+ // Scaling.
+ // Allocate temporary memory for spitting UV planes and scaling them.
+ const int dst_chroma_width = (dst_width + 1) / 2;
+ const int dst_chroma_height = (dst_height + 1) / 2;
+
+ align_buffer_64(tmp_buffer,
+ src_chroma_width * src_chroma_height * 2 +
+ dst_chroma_width * dst_chroma_height * 2);
+
+ uint8_t* const src_u = tmp_buffer;
+ uint8_t* const src_v = src_u + src_chroma_width * src_chroma_height;
+ uint8_t* const dst_u = src_v + src_chroma_width * src_chroma_height;
+ uint8_t* const dst_v = dst_u + dst_chroma_width * dst_chroma_height;
+
+ // Split source UV plane into separate U and V plane using the temporary data.
+ libyuv::SplitUVPlane(src_uv, src_stride_uv, src_u, src_chroma_width, src_v,
+ src_chroma_width, src_chroma_width, src_chroma_height);
+
+ // Scale the planes.
+ libyuv::I420Scale(
+ src_y, src_stride_y, src_u, src_chroma_width, src_v, src_chroma_width,
+ src_width, src_height, dst_y, dst_stride_y, dst_u, dst_chroma_width,
+ dst_v, dst_chroma_width, dst_width, dst_height, filtering);
+
+ // Merge the UV planes into the destination.
+ libyuv::MergeUVPlane(dst_u, dst_chroma_width, dst_v, dst_chroma_width, dst_uv,
+ dst_stride_uv, dst_chroma_width, dst_chroma_height);
+ free_aligned_buffer_64(tmp_buffer);
+ return 0;
+}
+
// Deprecated api
LIBYUV_API
int Scale(const uint8_t* src_y,
diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc
index 1508bf25..86ac8164 100644
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -494,6 +494,110 @@ static int I444TestFilter_16(int src_width,
return max_diff;
}
+// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
+static int NV12TestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i, j;
+ int src_width_uv = (Abs(src_width) + 1) >> 1;
+ int src_height_uv = (Abs(src_height) + 1) >> 1;
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv) * 2;
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv * 2;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_uv, src_uv_plane_size);
+ if (!src_y || !src_uv) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_uv, src_uv_plane_size);
+
+ int dst_width_uv = (dst_width + 1) >> 1;
+ int dst_height_uv = (dst_height + 1) >> 1;
+
+ int64_t dst_y_plane_size = (dst_width) * (dst_height);
+ int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv) * 2;
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv * 2;
+
+ align_buffer_page_end(dst_y_c, dst_y_plane_size);
+ align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_opt, dst_y_plane_size);
+ align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
+ if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ double c_time = get_time();
+ NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv,
+ src_width, src_height, dst_y_c, dst_stride_y, dst_uv_c,
+ dst_stride_uv, dst_width, dst_height, f);
+ c_time = (get_time() - c_time);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ double opt_time = get_time();
+ for (i = 0; i < benchmark_iterations; ++i) {
+ NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv,
+ src_width, src_height, dst_y_opt, dst_stride_y, dst_uv_opt,
+ dst_stride_uv, dst_width, dst_height, f);
+ }
+ opt_time = (get_time() - opt_time) / benchmark_iterations;
+ // Report performance of C vs OPT.
+ printf("filter %d - %8d us C - %8d us OPT\n", f,
+ static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
+
+ // C version may be a little off from the optimized. Order of
+ // operations may introduce rounding somewhere. So do a difference
+ // of the buffers and look to see that the max difference is not
+ // over 3.
+ int max_diff = 0;
+ for (i = 0; i < (dst_height); ++i) {
+ for (j = 0; j < (dst_width); ++j) {
+ int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
+ dst_y_opt[(i * dst_stride_y) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ for (i = 0; i < (dst_height_uv); ++i) {
+ for (j = 0; j < (dst_width_uv * 2); ++j) {
+ int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
+ dst_uv_opt[(i * dst_stride_uv) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_c);
+ free_aligned_buffer_page_end(dst_uv_c);
+ free_aligned_buffer_page_end(dst_y_opt);
+ free_aligned_buffer_page_end(dst_uv_opt);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_uv);
+
+ return max_diff;
+}
+
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
// 2 is chroma subsample.
@@ -532,6 +636,14 @@ static int I444TestFilter_16(int src_width,
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \
+ int diff = NV12TestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
}
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
@@ -617,6 +729,12 @@ TEST_FACTOR(3, 1, 3, 0)
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \
+ int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
}
#ifdef ENABLE_SLOW_TESTS