aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2019-05-13 17:34:53 -0700
committerCommit Bot <commit-bot@chromium.org>2019-05-14 01:18:06 +0000
commit681c6c67397060df7787d4a42a10c79172c5cf5d (patch)
tree83f9b99ecb32e3bf731a178c19de826fa1e7bd76
parent05f72b86029b769f6015b58396268370078a5f5e (diff)
downloadlibyuv-681c6c67397060df7787d4a42a10c79172c5cf5d.tar.gz
Add LIBYUV_API to NV12ToABGR and I444Rotate, I444Scale
Gaussian blur low levels ported to 32 bit neon. But they are not hooked up to anything but a unittest. Bug:b/248041731, b/132108021, b/129908793 Change-Id: Iccebb8ffd6b719810aa11dd770a525227da4c357 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1611206 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Chong Zhang <chz@google.com>
-rw-r--r--README.chromium2
-rw-r--r--include/libyuv/convert_argb.h1
-rw-r--r--include/libyuv/scale.h19
-rw-r--r--include/libyuv/version.h2
-rw-r--r--source/convert_argb.cc3
-rw-r--r--source/rotate.cc27
-rw-r--r--source/row_neon.cc78
-rw-r--r--source/scale.cc33
-rw-r--r--unit_test/planar_test.cc6
-rw-r--r--unit_test/rotate_test.cc117
-rw-r--r--unit_test/scale_test.cc355
11 files changed, 582 insertions, 61 deletions
diff --git a/README.chromium b/README.chromium
index f00b242a..6f936a17 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1727
+Version: 1730
License: BSD
License File: LICENSE
diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index b51f49d1..e8ed1f59 100644
--- a/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@@ -256,6 +256,7 @@ int NV21ToARGB(const uint8_t* src_y,
int height);
// Convert NV12 to ABGR.
+LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
diff --git a/include/libyuv/scale.h b/include/libyuv/scale.h
index 32464d81..23ba1634 100644
--- a/include/libyuv/scale.h
+++ b/include/libyuv/scale.h
@@ -126,6 +126,25 @@ int I444Scale(const uint8_t* src_y,
int dst_height,
enum FilterMode filtering);
+LIBYUV_API
+int I444Scale_16(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 2b815909..9a35da41 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1727
+#define LIBYUV_VERSION 1730
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index ffca4ea0..54050333 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -1793,8 +1793,9 @@ int NV21ToARGB(const uint8_t* src_y,
}
// Convert NV12 to ABGR.
-// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
+// To output ABGR instead of ARGB swap the UV and use a mirrored yuv matrix.
// To swap the UV use NV12 instead of NV21.LIBYUV_API
+LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
diff --git a/source/rotate.cc b/source/rotate.cc
index c334c40e..d414186a 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -521,28 +521,19 @@ int I444Rotate(const uint8_t* src_y,
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate90:
- RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y,
- width, height);
- RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u,
- width, height);
- RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v,
- width, height);
+ RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate270:
- RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y,
- width, height);
- RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u,
- width, height);
- RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v,
- width, height);
+ RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate180:
- RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y,
- width, height);
- RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u,
- width, height);
- RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v,
- width, height);
+ RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
default:
break;
diff --git a/source/row_neon.cc b/source/row_neon.cc
index baf57495..a12fa790 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -2685,6 +2685,84 @@ void ByteToFloatRow_NEON(const uint8_t* src,
: "cc", "memory", "q1", "q2", "q3");
}
+// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
+void GaussCol_NEON(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
+ int width) {
+ asm volatile(
+ "vmov.u16 d6, #4 \n" // constant 4
+ "vmov.u16 d7, #6 \n" // constant 6
+
+ "1: \n"
+ "vld1.16 {q1}, [%0]! \n" // load 8 samples, 5 rows
+ "vld1.16 {q2}, [%4]! \n"
+ "vaddl.u16 q0, d2, d4 \n" // * 1
+ "vaddl.u16 q1, d3, d5 \n" // * 1
+ "vld1.16 {q2}, [%1]! \n"
+ "vmlal.u16 q0, d4, d6 \n" // * 4
+ "vmlal.u16 q1, d5, d6 \n" // * 4
+ "vld1.16 {q2}, [%2]! \n"
+ "vmlal.u16 q0, d4, d7 \n" // * 6
+ "vmlal.u16 q1, d5, d7 \n" // * 6
+ "vld1.16 {q2}, [%3]! \n"
+ "vmlal.u16 q0, d4, d6 \n" // * 4
+ "vmlal.u16 q1, d5, d6 \n" // * 4
+ "subs %6, %6, #8 \n" // 8 processed per loop
+ "vst1.32 {q0, q1}, [%5]! \n" // store 8 samples
+ "bgt 1b \n"
+ : "+r"(src0), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(src4), // %4
+ "+r"(dst), // %5
+ "+r"(width) // %6
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
+void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
+ const uint32_t* src1 = src + 1;
+ const uint32_t* src2 = src + 2;
+ const uint32_t* src3 = src + 3;
+ asm volatile(
+ "vmov.u32 q10, #4 \n" // constant 4
+ "vmov.u32 q11, #6 \n" // constant 6
+
+ "1: \n"
+ "vld1.32 {q0, q1}, [%0]! \n" // load 12 source samples
+ "vld1.32 {q2}, [%0] \n"
+ "vadd.u32 q0, q0, q1 \n" // * 1
+ "vadd.u32 q1, q1, q2 \n" // * 1
+ "vld1.32 {q2, q3}, [%2]! \n"
+ "vmla.u32 q0, q2, q11 \n" // * 6
+ "vmla.u32 q1, q3, q11 \n" // * 6
+ "vld1.32 {q2, q3}, [%1]! \n"
+ "vld1.32 {q8, q9}, [%3]! \n"
+ "vadd.u32 q2, q2, q8 \n" // add rows for * 4
+ "vadd.u32 q3, q3, q9 \n"
+ "vmla.u32 q0, q2, q10 \n" // * 4
+ "vmla.u32 q1, q3, q10 \n" // * 4
+ "subs %5, %5, #8 \n" // 8 processed per loop
+ "vqshrn.u32 d0, q0, #8 \n" // round and pack
+ "vqshrn.u32 d1, q1, #8 \n"
+ "vst1.u16 {q0}, [%4]! \n" // store 8 samples
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(dst), // %4
+ "+r"(width) // %5
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
+}
+
// Convert biplanar NV21 to packed YUV24
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
diff --git a/source/scale.cc b/source/scale.cc
index d0560b8c..ab085496 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -1824,6 +1824,39 @@ int I444Scale(const uint8_t* src_y,
return 0;
}
+LIBYUV_API
+int I444Scale_16(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
+ dst_width <= 0 || dst_height <= 0) {
+ return -1;
+ }
+
+ ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
+ dst_width, dst_height, filtering);
+ ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
+ dst_width, dst_height, filtering);
+ ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
+ dst_width, dst_height, filtering);
+ return 0;
+}
+
// Deprecated api
LIBYUV_API
int Scale(const uint8_t* src_y,
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index 3a502906..70f8966e 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -3186,7 +3186,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
}
GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
@@ -3239,7 +3240,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
&orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
diff --git a/unit_test/rotate_test.cc b/unit_test/rotate_test.cc
index d04b96e9..61941e63 100644
--- a/unit_test/rotate_test.cc
+++ b/unit_test/rotate_test.cc
@@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
benchmark_cpu_info_);
}
+static void I444TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i444_y_size = src_width * Abs(src_height);
+ int src_i444_uv_size = src_width * Abs(src_height);
+ int src_i444_size = src_i444_y_size + src_i444_uv_size * 2;
+ align_buffer_page_end(src_i444, src_i444_size);
+ for (int i = 0; i < src_i444_size; ++i) {
+ src_i444[i] = fastrand() & 0xff;
+ }
+
+ int dst_i444_y_size = dst_width * dst_height;
+ int dst_i444_uv_size = dst_width * dst_height;
+ int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2;
+ align_buffer_page_end(dst_i444_c, dst_i444_size);
+ align_buffer_page_end(dst_i444_opt, dst_i444_size);
+ memset(dst_i444_c, 2, dst_i444_size);
+ memset(dst_i444_opt, 3, dst_i444_size);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
+ src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
+ dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width,
+ dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width,
+ src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
+ src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
+ dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size,
+ dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size,
+ dst_width, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i444_size; ++i) {
+ EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(dst_i444_c);
+ free_aligned_buffer_page_end(dst_i444_opt);
+ free_aligned_buffer_page_end(src_i444);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate0_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate90_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate180_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+// TODO(fbarchard): Remove odd width tests.
+// Odd width tests work but disabled because they use C code and can be
+// tested by passing an odd width command line or environment variable.
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
static void NV12TestRotate(int src_width,
int src_height,
int dst_width,
diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc
index d97d54a8..811b2d04 100644
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -22,14 +22,14 @@
namespace libyuv {
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
-static int TestFilter(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- FilterMode f,
- int benchmark_iterations,
- int disable_cpu_flags,
- int benchmark_cpu_info) {
+static int I420TestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
@@ -141,14 +141,14 @@ static int TestFilter(int src_width,
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
-static int TestFilter_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- FilterMode f,
- int benchmark_iterations,
- int disable_cpu_flags,
- int benchmark_cpu_info) {
+static int I420TestFilter_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
@@ -256,6 +256,241 @@ static int TestFilter_16(int src_width,
return max_diff;
}
+// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
+static int I444TestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i, j;
+ int src_width_uv = Abs(src_width);
+ int src_height_uv = Abs(src_height);
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ if (!src_y || !src_u || !src_v) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_u, src_uv_plane_size);
+ MemRandomize(src_v, src_uv_plane_size);
+
+ int dst_width_uv = dst_width;
+ int dst_height_uv = dst_height;
+
+ int64_t dst_y_plane_size = (dst_width) * (dst_height);
+ int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
+
+ align_buffer_page_end(dst_y_c, dst_y_plane_size);
+ align_buffer_page_end(dst_u_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_opt, dst_y_plane_size);
+ align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
+ if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
+ !dst_v_opt) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ double c_time = get_time();
+ I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
+ dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
+ c_time = (get_time() - c_time);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ double opt_time = get_time();
+ for (i = 0; i < benchmark_iterations; ++i) {
+ I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
+ dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
+ f);
+ }
+ opt_time = (get_time() - opt_time) / benchmark_iterations;
+ // Report performance of C vs OPT.
+ printf("filter %d - %8d us C - %8d us OPT\n", f,
+ static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
+
+ // C version may be a little off from the optimized. Order of
+ // operations may introduce rounding somewhere. So do a difference
+ // of the buffers and look to see that the max difference is not
+ // over 3.
+ int max_diff = 0;
+ for (i = 0; i < (dst_height); ++i) {
+ for (j = 0; j < (dst_width); ++j) {
+ int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
+ dst_y_opt[(i * dst_stride_y) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ for (i = 0; i < (dst_height_uv); ++i) {
+ for (j = 0; j < (dst_width_uv); ++j) {
+ int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
+ dst_u_opt[(i * dst_stride_uv) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
+ dst_v_opt[(i * dst_stride_uv) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_c);
+ free_aligned_buffer_page_end(dst_u_c);
+ free_aligned_buffer_page_end(dst_v_c);
+ free_aligned_buffer_page_end(dst_y_opt);
+ free_aligned_buffer_page_end(dst_u_opt);
+ free_aligned_buffer_page_end(dst_v_opt);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+
+ return max_diff;
+}
+
+// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
+// 0 = exact.
+static int I444TestFilter_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i;
+ int src_width_uv = Abs(src_width);
+ int src_height_uv = Abs(src_height);
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ align_buffer_page_end(src_y_16, src_y_plane_size * 2);
+ align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
+ align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
+ if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
+ uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
+ uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
+
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_u, src_uv_plane_size);
+ MemRandomize(src_v, src_uv_plane_size);
+
+ for (i = 0; i < src_y_plane_size; ++i) {
+ p_src_y_16[i] = src_y[i];
+ }
+ for (i = 0; i < src_uv_plane_size; ++i) {
+ p_src_u_16[i] = src_u[i];
+ p_src_v_16[i] = src_v[i];
+ }
+
+ int dst_width_uv = dst_width;
+ int dst_height_uv = dst_height;
+
+ int dst_y_plane_size = (dst_width) * (dst_height);
+ int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
+
+ align_buffer_page_end(dst_y_8, dst_y_plane_size);
+ align_buffer_page_end(dst_u_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
+ align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
+ align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
+
+ uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
+ uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
+ uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
+ dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (i = 0; i < benchmark_iterations; ++i) {
+ I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
+ p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
+ dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
+ dst_stride_uv, dst_width, dst_height, f);
+ }
+
+ // Expect an exact match.
+ int max_diff = 0;
+ for (i = 0; i < dst_y_plane_size; ++i) {
+ int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_8);
+ free_aligned_buffer_page_end(dst_u_8);
+ free_aligned_buffer_page_end(dst_v_8);
+ free_aligned_buffer_page_end(dst_y_16);
+ free_aligned_buffer_page_end(dst_u_16);
+ free_aligned_buffer_page_end(dst_v_16);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+ free_aligned_buffer_page_end(src_y_16);
+ free_aligned_buffer_page_end(src_u_16);
+ free_aligned_buffer_page_end(src_v_16);
+
+ return max_diff;
+}
+
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
// 2 is chroma subsample.
@@ -263,16 +498,32 @@ static int TestFilter_16(int src_width,
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
- TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
- int diff = TestFilter( \
+ TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
+ int diff = I420TestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
+ int diff = I444TestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \
- int diff = TestFilter_16( \
+ TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
+ int diff = I420TestFilter_16( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
+ int diff = I444TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
@@ -300,30 +551,58 @@ TEST_FACTOR(3, 1, 3, 0)
#undef DX
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
- TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
- int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \
- kFilter##filter, benchmark_iterations_, \
- disable_cpu_flags_, benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
+ int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
+ int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
+ int diff = I420TestFilter_16( \
+ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
+ int diff = I444TestFilter_16( \
+ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
+ int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
- int diff = TestFilter(width, height, Abs(benchmark_width_), \
- Abs(benchmark_height_), kFilter##filter, \
- benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
+ int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \
- int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
- height, kFilter##filter, benchmark_iterations_, \
- disable_cpu_flags_, benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, \
+ I420##name##From##width##x##height##_##filter##_16) { \
+ int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \
- int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
- Abs(benchmark_height_), kFilter##filter, \
- benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, \
+ I444##name##From##width##x##height##_##filter##_16) { \
+ int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}